PEG.js is a parser generator for JavaScript that produces parsers.
PEG.js generates a parser from a Parsing Expression Grammar describing a language.
We can specify what the parser returns (using semantic actions on matched parts of the input).
To use the pegjs command, install PEG.js globally:
$ npm install -g pegjsTo use the JavaScript API, install PEG.js locally:
$ npm install pegjs
To use it from the browser, download the PEG.js library ( regular or minified version).
[~/srcPLgrado/pegjs/examples(master)]$ pegjs --help Usage: pegjs [options] [--] [<input_file>] [<output_file>] Generates a parser from the PEG grammar specified in the <input_file> and writes it to the <output_file>. If the <output_file> is omitted, its name is generated by changing the <input_file> extension to ".js". If both <input_file> and <output_file> are omitted, standard input and output are used. Options: -e, --export-var <variable> name of the variable where the parser object will be stored (default: "module.exports") --cache make generated parser cache results --allowed-start-rules <rules> comma-separated list of rules the generated parser will be allowed to start parsing from (default: the first rule in the grammar) -o, --optimize <goal> select optimization for speed or size (default: speed) --plugin <plugin> use a specified plugin (can be specified multiple times) --extra-options <options> additional options (in JSON format) to pass to PEG.buildParser --extra-options-file <file> file with additional options (in JSON format) to pass to PEG.buildParser -v, --version print version information and exit -h, --help print help and exit
[~/srcPLgrado/pegjs/examples(master)]$ node > PEG = require("pegjs") { VERSION: '0.8.0', GrammarError: [Function], parser: { SyntaxError: [Function: SyntaxError], parse: [Function: parse] }, compiler: { passes: { check: [Object], transform: [Object], generate: [Object] }, compile: [Function] }, buildParser: [Function] }
> parser = PEG.buildParser("start = ('a' / 'b')+") { SyntaxError: [Function: SyntaxError], parse: [Function: parse] }
Using the generated parser is simple — just call its parse
method
and pass an input string as a parameter.
The method will return
The exception will contain
offset
, line
, column
, expected
, found
and message
properties with more details about the error.
options
object to the parse
method.
Only one option is currently supported:
startRule
which is the name of the rule to start parsing from.
> parser.parse("abba"); [ 'a', 'b', 'b', 'a' ] >
[~/srcPLgrado/pegjs/examples(master)]$ cat allowedstartrules.js var PEG = require("pegjs"); var grammar = "a = 'hello' b\nb = 'world'"; //"a = 'hello' b\nb='world'; console.log(grammar); // a = 'hello' b // b = 'world' var parser = PEG.buildParser(grammar,{ allowedStartRules: ['a', 'b'] }); var r = parser.parse("helloworld", { startRule: 'a' }); console.log(r); // [ 'hello', 'world' ] r = parser.parse("helloworld") console.log(r); // [ 'hello', 'world' ] r = parser.parse("world", { startRule: 'b' }) console.log(r); // 'world' try { r = parser.parse("world"); // Throws an exception } catch(e) { console.log("Error!!!!"); console.log(e); }
[~/srcPLgrado/pegjs/examples(master)]$ node allowedstartrules.js a = 'hello' b b = 'world' [ 'hello', 'world' ] [ 'hello', 'world' ] world Error!!!! { message: 'Expected "hello" but "w" found.', expected: [ { type: 'literal', value: 'hello', description: '"hello"' } ], found: 'w', offset: 0, line: 1, column: 1, name: 'SyntaxError' }
The exception contains
output
is set to
parser, the method will return generated parser object;
> PEG = require("pegjs") > grammar = "a = 'hello' b\nb='world'" 'a = \'hello\' b\nb=\'world\'' > console.log(grammar) a = 'hello' b b='world' > parser = PEG.buildParser(grammar,{ output: "parser"}) { SyntaxError: [Function: SyntaxError], parse: [Function: parse] } > parser.parse("helloworld") [ 'hello', 'world' ]
> parser = PEG.buildParser(grammar,{ output: "source"}) > typeof parser 'string' > console.log(parser.substring(0,100)) (function() { /* * Generated by PEG.js 0.8.0. * * http://pegjs.majda.cz/ */
parser
).
$ pegjs --plugin pegjs-coffee-plugin remove_left_recursive.pegjs
PEGjs Coffee Plugin is a plugin for PEG.js to use CoffeeScript in actions.
[~/srcPLgrado/pegjs_examples(master)]$ cat plugin.coffee #!/usr/bin/env coffee PEG = require 'pegjs' coffee = require 'pegjs-coffee-plugin' grammar = """ a = 'hello' _ b { console.log 3; "hello world!" } b = 'world' { console.log 2 } _ = [ \t]+ { console.log 1 } """ parser = PEG.buildParser grammar, plugins: [coffee] r = parser.parse "hello world" console.log("r = #{r}")La ejecución nos muestra además el orden de abajo - arriba y de izquierda -derecha en la ejecución de las acciones semánticas:
[~/srcPLgrado/pegjs_examples(master)]$ coffee plugin.coffee 1 2 3 r = hello world!
[~/Dropbox/src/javascript/PLgrado/pegjs/examples(master)]$ npm install -g pegjs-coffee-plugin
[~/Dropbox/src/javascript/PLgrado/pegjs-coffee-plugin/examples(master)]$ cat simple.pegjs { @reduce = (left, right)-> sum = left for t in right op = t[0] num = t[1] switch op when '+' then sum += num; break when '-' then sum -= num; break when '*' then sum *= num; break when '/' then sum /= num; break else console.log("Error! "+op) sum } sum = left:product right:([+-] product)* { @reduce(left, right); } product = left:value right:([*/] value)* { @reduce(left, right); } value = number:[0-9]+ { parseInt(number.join(''),10) } / '(' sum:sum ')' { sum }
[~/Dropbox/src/javascript/PLgrado/pegjs-coffee-plugin/examples(master)]$ cat use_simple.coffee PEG = require("./simple.js") r = PEG.parse("2+3*(2+1)-10/2") console.log(r)
[~/srcPLgrado/pegjs/examples/coffee-plugin-examples(master)]$ pegjs --plugin pegjs-coffee-plugin simple.pegjs [~/srcPLgrado/pegjs/examples/coffee-plugin-examples(master)]$ coffee use_simple.coffee input = 3-1-2 result = 0 input = 2+3*(2+1)-10/2 result = 6
If true
, makes the parser cache results, avoiding exponential parsing
time in pathological cases but making the parser slower (default:
false
).
Selects between optimizing the generated parser for parsing speed
(speed
) or code size (size
) (default: speed
).
Casiano Rodríguez León