primary) that
identifies the rule, and a
integer / LEFTPAR additive:additive RIGHTPAR { return additive; }
that defines a pattern to match
against the input text and possibly contains some JavaScript code that
determines what happens when the pattern matches successfully.
integer rule has a human-readable name).
integer "integer" = NUMBER
=) and a parsing expression.
integer "integer" = NUMBER
;) after the parsing expression
is allowed.
{ and }).
& { predicate } and ! { predicate }
are called semantic predicates)
options variable.
[~/srcPLgrado/pegjs_examples(master)]$ cat initializer.js
var PEG = require("pegjs");
var grammar = [
' { ',
' util = require("util"); ',
' ',
' var g = "visible variable"; ',
' console.log("Inside Initializer! options = "+util.inspect(options)); ',
' } ',
" start = 'a' { console.log(g); return 1; } ",
" / & { console.log('inside predicate: '+g); return true; } 'b' { return 2; }"
];
grammar = grammar.join('\n');
console.log("GRAMMAR:\n"+grammar);
var parser = PEG.buildParser(grammar);
var r = parser.parse("a", { x: 'hello' });
console.log(r);
r = parser.parse("b");
console.log(r);
Produces the following output:
[~/srcPLgrado/pegjs_examples(master)]$ node initializer.js
GRAMMAR:
{
util = require("util");
var g = "visible variable";
console.log("Inside Initializer! options = "+util.inspect(options));
}
start = 'a' { console.log(g); return 1; }
/ & { console.log('inside predicate: '+g); return true; } 'b' { return 2; }
Inside Initializer! options = { x: 'hello' }
visible variable
1
Inside Initializer! options = {}
inside predicate: visible variable
2
coffee-pegjs-plugin
You have a save scope shared between all actions and predicates.
initializer are added.
delete myObject.property)
as much as you like.
initializer.
start = a { @result }
a = "a" { @result = "awesome" }
And this will correctly return awesome if you call parse("a").
{ result = "awesome" } becomes
{ var result; result = "awesome" }
this and the security to just assign variables for local
use like you are used to when writing CoffeeScript.
[~/srcPLgrado/pegjs_examples(master)]$ cat initializer.coffee
PEG = require('pegjs')
coffee = require 'pegjs-coffee-plugin'
grammar = '''
{
util = require("util")
@g = "visible variable"
console.log("Inside Initializer! options = "+util.inspect(options))
}
start = 'a' { console.log(@g); 1 }
/ & {
console.log("inside predicate: '#{@g}''")
true
} 'b' { 2 }
'''
parser = PEG.buildParser(grammar, plugins: [coffee])
r = parser.parse('a', x: 'hello')
console.log r
r = parser.parse('b')
console.log r
[~/srcPLgrado/pegjs_examples(master)]$ coffee initializer.coffee
Inside Initializer! options = { x: 'hello' }
visible variable
1
Inside Initializer! options = {}
inside predicate: 'visible variable''
2
The parsing expressions of the rules are used to match the input text to the grammar.
There are various types of expressions — matching characters or character classes, indicating optional parts and repetition, etc.
Expressions can also contain references to other rules.
If an expression successfully matches a part of the text when running the generated parser, it produces a match result, which is a JavaScript value.
One special case of parser expression is a parser action — a piece of
JavaScript code inside curly braces ({ and }) that
takes match
results of some of the the preceding expressions and returns a JavaScript
value.
This value is considered match result of the preceding expression (in other words, the parser action is a match result transformer).
In our arithmetics example, there are many parser actions.
Consider this action:
digits:[0-9]+ { return parseInt(digits.join(""), 10); }
[0-9]+,
which is an
array of strings containing digits, as its parameter.
"literal" 'literal'Match exact literal string and return it. The string syntax is the same as in JavaScript.
Appending i right after the literal makes the match case-insensitive:
[~/srcPLgrado/pegjs_examples(master)]$ cat ignorecase.coffee
PEG = require('pegjs')
coffee = require 'pegjs-coffee-plugin'
grammar = '''
start = a:'a'i
'''
parser = PEG.buildParser(grammar, plugins: [coffee])
r = parser.parse('A')
console.log r
parser = PEG.buildParser(grammar, plugins: [coffee])
r = parser.parse('a')
console.log r
when executed produces:
[~/srcPLgrado/pegjs_examples(master)]$ coffee ignorecase.coffee A a
.Match exactly one character and return it as a string:
~/srcPLgrado/pegjs_examples(master)]$ cat dot.coffee
PEG = require('pegjs')
coffee = require 'pegjs-coffee-plugin'
grammar = '''
start = a: ..
'''
parser = PEG.buildParser(grammar, plugins: [coffee])
r = parser.parse('Ab')
console.log r
parser = PEG.buildParser(grammar, plugins: [coffee])
r = parser.parse("\n\t")
console.log r
When executed produces:
[~/srcPLgrado/pegjs_examples(master)]$ coffee dot.coffee [ 'A', 'b' ] [ '\n', '\t' ]
[characters]
[a-z]
means all lowercase letters).
^ inverts the matched set (e.g. [^a-z] means "all character but
lowercase letters).
i right after the literal makes the
match case-insensitive.
[~/srcPLgrado/pegjs_examples(master)]$ cat regexp.coffee
PEG = require('pegjs')
coffee = require 'pegjs-coffee-plugin'
grammar = '''
start = a: [aeiou\u2661]i . [^x-z]
'''
parser = PEG.buildParser(grammar, plugins: [coffee])
r = parser.parse('Abr')
console.log r
r = parser.parse('♡br')
console.log r
[~/srcPLgrado/pegjs_examples(master)]$ coffee regexp.coffee
[ 'A', 'b', 'r' ]
[ '♡', 'b', 'r' ]
rule
Match a parsing expression of a rule recursively and return its match result.
( expression )
Match a subexpression and return its match result.
expression *Match zero or more repetitions of the expression and return their match results in an array. The matching is greedy, i.e. the parser tries to match the expression as many times as possible.
expression +Match one or more repetitions of the expression and return their match results in an array. The matching is greedy, i.e. the parser tries to match the expression as many times as possible.
expression ?Try to match the expression. If the match succeeds, return its match result, otherwise return
null.
& expression
Try to match the expression.
If the match succeeds, just return undefined and do not advance
the parser position, otherwise consider the match failed.
! expression
Try to match the expression. If the match does not succeed, just return
undefined and do not advance the parser position, otherwise consider
the match failed.
[~/srcPLgrado/pegjs/examples(master)]$ cat notpredicate.pegjs
__ = (whitespace / eol / comment)*
/* Modeled after ECMA-262, 5th ed., 7.4. */
comment "comment"
= singleLineComment
/ multiLineComment
singleLineComment
= "//" (!eolChar .)* { return text(); }
multiLineComment
= "/*" (!"*/" .)* "*/" { return text(); }
/* Modeled after ECMA-262, 5th ed., 7.3. */
eol "end of line"
= "\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
eolChar
= [\n\r\u2028\u2029]
whitespace "whitespace"
= [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
[~/srcPLgrado/pegjs/examples(master)]$ cat mainnotpredicate.js
var PEG = require("./notpredicate.js");
var input = process.argv[2] || "// one comment\n"+
"// another comment \t/\n"+
"/* a\n"+
" third comment */";
console.log("\n*****\n"+input+"\n*****\n");
var r = PEG.parse(input);
console.log(r);
[~/srcPLgrado/pegjs/examples(master)]$ pegjs notpredicate.pegjs [~/srcPLgrado/pegjs/examples(master)]$ node mainnotpredicate.js ***** // one comment // another comment / /* a third comment */ ***** [ '// one comment', '\n', '// another comment \t/', '\n', '/* a\n third comment */' ]
& { predicate }
return statement.
true in boolean context, just return undefined and do not advance the
parser position; otherwise consider the match failed.
offset function.
The offset function returns a zero-based character index into the input string.
line and column
functions.
Both return one-based indexes.
options variable.
[~/srcPLgrado/pegjs_examples(master)]$ cat semantic_predicate.coffee
PEG = require('pegjs')
coffee = require 'pegjs-coffee-plugin'
grammar = '''
{
@util = require("util")
@g = "visible variable"
console.log("Inside Initializer! options = "+@util.inspect(options))
}
start = 'a' { console.log(@g); 1 }
/ c:'c' '\\n' & {
console.log("inside predicate: @g = '#{@g}' c = '#{c}'")
console.log("options = #{@util.inspect(options)}")
console.log("offset = #{offset()}")
console.log("line = #{line()}")
console.log("column = #{column()}")
true
} 'b' { 2 }
'''
parser = PEG.buildParser(grammar, plugins: [coffee])
r = parser.parse('a', x: 'hello')
console.log r
r = parser.parse("c\nb", y : 'world')
console.log r
When executed produces the following output:
[~/srcPLgrado/pegjs_examples(master)]$ coffee semantic_predicate.coffee
Inside Initializer! options = { x: 'hello' }
visible variable
1
Inside Initializer! options = { y: 'world' }
inside predicate: @g = 'visible variable' c = 'c'
options = { y: 'world' }
offset = 2
line = 2
column = 1
2
! { predicate }
return statement.
false in boolean context, just return undefined and do not advance the
parser position; otherwise consider the match failed.
offset function.
The offset function returns a zero-based character index into the input string.
line and column functions.
Both return one-based indexes.
options variable.
$ expressionTry to match the expression. If the match succeeds, return the matched string instead of the match result.
label : expression
label.
expression1 expression2 ... expressionn
Match a sequence of expressions and return their match results in an array.
expression { action }
action,
otherwise consider the match failed.
action is a piece of JavaScript code that is executed as if it was
inside a function.
return statement.
expected
function, which makes the parser throw an exception.
The function takes
one parameter — a description of what was expected at the current
position. This description will be used as part of a message of the
thrown exception.
error function, which also
makes the parser throw an exception. The function takes one parameter
— an error message. This message will be used by the thrown exception.
text function.
offset function.
It returns a zero-based character index into the input string.
line and column
functions. Both return one-based indexes.
options
variable.
expression1 / expression2 / ... / expressionnTry to match the first expression, if it does not succeed, try the second one, etc. Return the match result of the first successfully matched expression. If no expression matches, consider the match failed.
[~/srcPLgrado/pegjs(master)]$ cat src/parser.pegjs
grammar
= __ initializer? rule+
initializer
= action semicolon?
rule
= identifier string? equals expression semicolon?
expression
= choice
choice
= sequence (slash sequence)*
sequence
= labeled* action
/ labeled*
labeled
= identifier colon prefixed
/ prefixed
prefixed
= dollar suffixed
/ and action
/ and suffixed
/ not action
/ not suffixed
/ suffixed
suffixed
= primary question
/ primary star
/ primary plus
/ primary
primary
= identifier !(string? equals)
/ literal
/ class
/ dot
/ lparen expression rparen
/* "Lexical" elements */
action "action"
= braced __
braced
= "{" (braced / nonBraceCharacters)* "}"
nonBraceCharacters
= nonBraceCharacter+
nonBraceCharacter
= [^{}]
equals = "=" __
colon = ":" __
semicolon = ";" __
slash = "/" __
and = "&" __
not = "!" __
dollar = "$" __
question = "?" __
star = "*" __
plus = "+" __
lparen = "(" __
rparen = ")" __
dot = "." __
/*
* Modeled after ECMA-262, 5th ed., 7.6, but much simplified:
*
* * no Unicode escape sequences
*
* * "Unicode combining marks" and "Unicode connection punctuation" can't be
* part of the identifier
*
* * only [a-zA-Z] is considered a "Unicode letter"
*
* * only [0-9] is considered a "Unicode digit"
*
* The simplifications were made just to make the implementation little bit
* easier, there is no "philosophical" reason behind them.
*
* Contrary to ECMA 262, the "$" character is not valid because it serves other
* purpose in the grammar.
*/
identifier "identifier"
= (letter / "_") (letter / digit / "_")* __
/*
* Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only
* vaguely).
*/
literal "literal"
= (doubleQuotedString / singleQuotedString) "i"? __
string "string"
= (doubleQuotedString / singleQuotedString) __
doubleQuotedString
= '"' doubleQuotedCharacter* '"'
doubleQuotedCharacter
= simpleDoubleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleDoubleQuotedCharacter
= !('"' / "\\" / eolChar) .
singleQuotedString
= "'" singleQuotedCharacter* "'"
singleQuotedCharacter
= simpleSingleQuotedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleSingleQuotedCharacter
= !("'" / "\\" / eolChar) .
class "character class"
= "[" "^"? (classCharacterRange / classCharacter)* "]" "i"? __
classCharacterRange
= classCharacter "-" classCharacter
classCharacter
= bracketDelimitedCharacter
bracketDelimitedCharacter
= simpleBracketDelimitedCharacter
/ simpleEscapeSequence
/ zeroEscapeSequence
/ hexEscapeSequence
/ unicodeEscapeSequence
/ eolEscapeSequence
simpleBracketDelimitedCharacter
= !("]" / "\\" / eolChar) .
simpleEscapeSequence
= "\\" !(digit / "x" / "u" / eolChar) .
zeroEscapeSequence
= "\\0" !digit
hexEscapeSequence
= "\\x" hexDigit hexDigit)
unicodeEscapeSequence
= "\\u" hexDigit hexDigit hexDigit hexDigit)
eolEscapeSequence
= "\\" eol
digit
= [0-9]
hexDigit
= [0-9a-fA-F]
letter
= lowerCaseLetter
/ upperCaseLetter
lowerCaseLetter
= [a-z]
upperCaseLetter
= [A-Z]
__ = (whitespace / eol / comment)*
/* Modeled after ECMA-262, 5th ed., 7.4. */
comment "comment"
= singleLineComment
/ multiLineComment
singleLineComment
= "//" (!eolChar .)*
multiLineComment
= "/*" (!"*/" .)* "*/"
/* Modeled after ECMA-262, 5th ed., 7.3. */
eol "end of line"
= "\n"
/ "\r\n"
/ "\r"
/ "\u2028"
/ "\u2029"
eolChar
= [\n\r\u2028\u2029]
/* Modeled after ECMA-262, 5th ed., 7.2. */
whitespace "whitespace"
= [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
Casiano Rodríguez León