PEG.js is a parser generator for JavaScript that produces parsers.
PEG.js generates a parser from a Parsing Expression Grammar describing a language.
We can specify what the parser returns (using semantic actions on matched parts of the input).
To use the pegjs command, install PEG.js globally:
$ npm install -g pegjsTo use the JavaScript API, install PEG.js locally:
$ npm install pegjs
To use it from the browser, download the PEG.js library ( regular or minified version).
[~/srcPLgrado/pegjs/examples(master)]$ pegjs --help
Usage: pegjs [options] [--] [<input_file>] [<output_file>]
Generates a parser from the PEG grammar specified in the <input_file> and writes
it to the <output_file>.
If the <output_file> is omitted, its name is generated by changing the
<input_file> extension to ".js". If both <input_file> and <output_file> are
omitted, standard input and output are used.
Options:
-e, --export-var <variable> name of the variable where the parser
object will be stored (default:
"module.exports")
--cache make generated parser cache results
--allowed-start-rules <rules> comma-separated list of rules the generated
parser will be allowed to start parsing
from (default: the first rule in the
grammar)
-o, --optimize <goal> select optimization for speed or size
(default: speed)
--plugin <plugin> use a specified plugin (can be specified
multiple times)
--extra-options <options> additional options (in JSON format) to pass
to PEG.buildParser
--extra-options-file <file> file with additional options (in JSON
format) to pass to PEG.buildParser
-v, --version print version information and exit
-h, --help print help and exit
[~/srcPLgrado/pegjs/examples(master)]$ node
> PEG = require("pegjs")
{ VERSION: '0.8.0',
GrammarError: [Function],
parser:
{ SyntaxError: [Function: SyntaxError],
parse: [Function: parse] },
compiler:
{ passes:
{ check: [Object],
transform: [Object],
generate: [Object] },
compile: [Function] },
buildParser: [Function] }
> parser = PEG.buildParser("start = ('a' / 'b')+")
{ SyntaxError: [Function: SyntaxError],
parse: [Function: parse] }
Using the generated parser is simple — just call its parse method
and pass an input string as a parameter.
The method will return
The exception will contain
offset, line, column, expected, found and message
properties with more details about the error.
options object to the parse method.
Only one option is currently supported:
startRule which is the name of the rule to start parsing from.
> parser.parse("abba");
[ 'a', 'b', 'b', 'a' ]
>
[~/srcPLgrado/pegjs/examples(master)]$ cat allowedstartrules.js
var PEG = require("pegjs");
var grammar = "a = 'hello' b\nb = 'world'"; //"a = 'hello' b\nb='world';
console.log(grammar); // a = 'hello' b
// b = 'world'
var parser = PEG.buildParser(grammar,{ allowedStartRules: ['a', 'b'] });
var r = parser.parse("helloworld", { startRule: 'a' });
console.log(r); // [ 'hello', 'world' ]
r = parser.parse("helloworld")
console.log(r); // [ 'hello', 'world' ]
r = parser.parse("world", { startRule: 'b' })
console.log(r); // 'world'
try {
r = parser.parse("world"); // Throws an exception
}
catch(e) {
console.log("Error!!!!");
console.log(e);
}
[~/srcPLgrado/pegjs/examples(master)]$ node allowedstartrules.js
a = 'hello' b
b = 'world'
[ 'hello', 'world' ]
[ 'hello', 'world' ]
world
Error!!!!
{ message: 'Expected "hello" but "w" found.',
expected: [ { type: 'literal', value: 'hello', description: '"hello"' } ],
found: 'w',
offset: 0,
line: 1,
column: 1,
name: 'SyntaxError' }
The exception contains
output
is set to
parser, the method will return generated parser object;
> PEG = require("pegjs")
> grammar = "a = 'hello' b\nb='world'"
'a = \'hello\' b\nb=\'world\''
> console.log(grammar)
a = 'hello' b
b='world'
> parser = PEG.buildParser(grammar,{ output: "parser"})
{ SyntaxError: [Function: SyntaxError],
parse: [Function: parse] }
> parser.parse("helloworld")
[ 'hello', 'world' ]
> parser = PEG.buildParser(grammar,{ output: "source"})
> typeof parser
'string'
> console.log(parser.substring(0,100))
(function() {
/*
* Generated by PEG.js 0.8.0.
*
* http://pegjs.majda.cz/
*/
parser).
$ pegjs --plugin pegjs-coffee-plugin remove_left_recursive.pegjs
PEGjs Coffee Plugin is a plugin for PEG.js to use CoffeeScript in actions.
[~/srcPLgrado/pegjs_examples(master)]$ cat plugin.coffee
#!/usr/bin/env coffee
PEG = require 'pegjs'
coffee = require 'pegjs-coffee-plugin'
grammar = """
a = 'hello' _ b { console.log 3; "hello world!" }
b = 'world' { console.log 2 }
_ = [ \t]+ { console.log 1 }
"""
parser = PEG.buildParser grammar, plugins: [coffee]
r = parser.parse "hello world"
console.log("r = #{r}")
La ejecución nos muestra además el orden de abajo - arriba
y de izquierda -derecha en la ejecución de las acciones
semánticas:
[~/srcPLgrado/pegjs_examples(master)]$ coffee plugin.coffee 1 2 3 r = hello world!
[~/Dropbox/src/javascript/PLgrado/pegjs/examples(master)]$ npm install -g pegjs-coffee-plugin
[~/Dropbox/src/javascript/PLgrado/pegjs-coffee-plugin/examples(master)]$ cat simple.pegjs
{
@reduce = (left, right)->
sum = left
for t in right
op = t[0]
num = t[1]
switch op
when '+' then sum += num; break
when '-' then sum -= num; break
when '*' then sum *= num; break
when '/' then sum /= num; break
else console.log("Error! "+op)
sum
}
sum = left:product right:([+-] product)* { @reduce(left, right); }
product = left:value right:([*/] value)* { @reduce(left, right); }
value = number:[0-9]+ { parseInt(number.join(''),10) }
/ '(' sum:sum ')' { sum }
[~/Dropbox/src/javascript/PLgrado/pegjs-coffee-plugin/examples(master)]$ cat use_simple.coffee
PEG = require("./simple.js")
r = PEG.parse("2+3*(2+1)-10/2")
console.log(r)
[~/srcPLgrado/pegjs/examples/coffee-plugin-examples(master)]$ pegjs --plugin pegjs-coffee-plugin simple.pegjs [~/srcPLgrado/pegjs/examples/coffee-plugin-examples(master)]$ coffee use_simple.coffee input = 3-1-2 result = 0 input = 2+3*(2+1)-10/2 result = 6
If true, makes the parser cache results, avoiding exponential parsing
time in pathological cases but making the parser slower (default:
false).
Selects between optimizing the generated parser for parsing speed
(speed) or code size (size) (default: speed).
Casiano Rodríguez León