primary
) that
identifies the rule, and a
integer / LEFTPAR additive:additive RIGHTPAR { return additive; }that defines a pattern to match against the input text and possibly contains some JavaScript code that determines what happens when the pattern matches successfully.
integer
rule has a human-readable name).
integer "integer" = NUMBER
=
) and a parsing expression.
integer "integer" = NUMBER
;
) after the parsing expression
is allowed.
{
and }
).
& { predicate }
and ! { predicate }
are called semantic predicates)
options
variable.
[~/srcPLgrado/pegjs_examples(master)]$ cat initializer.js var PEG = require("pegjs"); var grammar = [ ' { ', ' util = require("util"); ', ' ', ' var g = "visible variable"; ', ' console.log("Inside Initializer! options = "+util.inspect(options)); ', ' } ', " start = 'a' { console.log(g); return 1; } ", " / & { console.log('inside predicate: '+g); return true; } 'b' { return 2; }" ]; grammar = grammar.join('\n'); console.log("GRAMMAR:\n"+grammar); var parser = PEG.buildParser(grammar); var r = parser.parse("a", { x: 'hello' }); console.log(r); r = parser.parse("b"); console.log(r);Produces the following output:
[~/srcPLgrado/pegjs_examples(master)]$ node initializer.js GRAMMAR: { util = require("util"); var g = "visible variable"; console.log("Inside Initializer! options = "+util.inspect(options)); } start = 'a' { console.log(g); return 1; } / & { console.log('inside predicate: '+g); return true; } 'b' { return 2; } Inside Initializer! options = { x: 'hello' } visible variable 1 Inside Initializer! options = {} inside predicate: visible variable 2
coffee-pegjs-plugin
You have a save scope shared between all actions and predicates.
initializer
are added.
delete myObject.property
)
as much as you like.
initializer
.
start = a { @result } a = "a" { @result = "awesome" }And this will correctly return
awesome
if you call parse("a")
.
{ result = "awesome" }
becomes
{ var result; result = "awesome" }
this
and the security to just assign variables for local
use like you are used to when writing CoffeeScript.
[~/srcPLgrado/pegjs_examples(master)]$ cat initializer.coffee PEG = require('pegjs') coffee = require 'pegjs-coffee-plugin' grammar = ''' { util = require("util") @g = "visible variable" console.log("Inside Initializer! options = "+util.inspect(options)) } start = 'a' { console.log(@g); 1 } / & { console.log("inside predicate: '#{@g}''") true } 'b' { 2 } ''' parser = PEG.buildParser(grammar, plugins: [coffee]) r = parser.parse('a', x: 'hello') console.log r r = parser.parse('b') console.log r
[~/srcPLgrado/pegjs_examples(master)]$ coffee initializer.coffee Inside Initializer! options = { x: 'hello' } visible variable 1 Inside Initializer! options = {} inside predicate: 'visible variable'' 2
The parsing expressions of the rules are used to match the input text to the grammar.
There are various types of expressions — matching characters or character classes, indicating optional parts and repetition, etc.
Expressions can also contain references to other rules.
If an expression successfully matches a part of the text when running the generated parser, it produces a match result, which is a JavaScript value.
One special case of parser expression is a parser action — a piece of
JavaScript code inside curly braces ({
and }
) that
takes match
results of some of the the preceding expressions and returns a JavaScript
value.
This value is considered match result of the preceding expression (in other words, the parser action is a match result transformer).
In our arithmetics example, there are many parser actions.
Consider this action:
digits:[0-9]+ { return parseInt(digits.join(""), 10); }
[0-9]+
,
which is an
array of strings containing digits, as its parameter.
"literal" 'literal'Match exact literal string and return it. The string syntax is the same as in JavaScript.
Appending i
right after the literal makes the match case-insensitive:
[~/srcPLgrado/pegjs_examples(master)]$ cat ignorecase.coffee PEG = require('pegjs') coffee = require 'pegjs-coffee-plugin' grammar = ''' start = a:'a'i ''' parser = PEG.buildParser(grammar, plugins: [coffee]) r = parser.parse('A') console.log r parser = PEG.buildParser(grammar, plugins: [coffee]) r = parser.parse('a') console.log rwhen executed produces:
[~/srcPLgrado/pegjs_examples(master)]$ coffee ignorecase.coffee A a
.Match exactly one character and return it as a string:
~/srcPLgrado/pegjs_examples(master)]$ cat dot.coffee PEG = require('pegjs') coffee = require 'pegjs-coffee-plugin' grammar = ''' start = a: .. ''' parser = PEG.buildParser(grammar, plugins: [coffee]) r = parser.parse('Ab') console.log r parser = PEG.buildParser(grammar, plugins: [coffee]) r = parser.parse("\n\t") console.log rWhen executed produces:
[~/srcPLgrado/pegjs_examples(master)]$ coffee dot.coffee [ 'A', 'b' ] [ '\n', '\t' ]
[characters]
[a-z]
means all lowercase letters).
^
inverts the matched set (e.g. [^a-z]
means "all character but
lowercase letters).
i
right after the literal makes the
match case-insensitive.
[~/srcPLgrado/pegjs_examples(master)]$ cat regexp.coffee PEG = require('pegjs') coffee = require 'pegjs-coffee-plugin' grammar = ''' start = a: [aeiou\u2661]i . [^x-z] ''' parser = PEG.buildParser(grammar, plugins: [coffee]) r = parser.parse('Abr') console.log r r = parser.parse('♡br') console.log r [~/srcPLgrado/pegjs_examples(master)]$ coffee regexp.coffee [ 'A', 'b', 'r' ] [ '♡', 'b', 'r' ]
rule
Match a parsing expression of a rule recursively and return its match result.
( expression )
Match a subexpression and return its match result.
expression *Match zero or more repetitions of the expression and return their match results in an array. The matching is greedy, i.e. the parser tries to match the expression as many times as possible.
expression +Match one or more repetitions of the expression and return their match results in an array. The matching is greedy, i.e. the parser tries to match the expression as many times as possible.
expression ?Try to match the expression. If the match succeeds, return its match result, otherwise return
null
.
& expression
Try to match the expression.
If the match succeeds, just return undefined
and do not advance
the parser position, otherwise consider the match failed.
! expression
Try to match the expression. If the match does not succeed, just return
undefined
and do not advance the parser position, otherwise consider
the match failed.
[~/srcPLgrado/pegjs/examples(master)]$ cat notpredicate.pegjs __ = (whitespace / eol / comment)* /* Modeled after ECMA-262, 5th ed., 7.4. */ comment "comment" = singleLineComment / multiLineComment singleLineComment = "//" (!eolChar .)* { return text(); } multiLineComment = "/*" (!"*/" .)* "*/" { return text(); } /* Modeled after ECMA-262, 5th ed., 7.3. */ eol "end of line" = "\n" / "\r\n" / "\r" / "\u2028" / "\u2029" eolChar = [\n\r\u2028\u2029] whitespace "whitespace" = [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
[~/srcPLgrado/pegjs/examples(master)]$ cat mainnotpredicate.js var PEG = require("./notpredicate.js"); var input = process.argv[2] || "// one comment\n"+ "// another comment \t/\n"+ "/* a\n"+ " third comment */"; console.log("\n*****\n"+input+"\n*****\n"); var r = PEG.parse(input); console.log(r);
[~/srcPLgrado/pegjs/examples(master)]$ pegjs notpredicate.pegjs [~/srcPLgrado/pegjs/examples(master)]$ node mainnotpredicate.js ***** // one comment // another comment / /* a third comment */ ***** [ '// one comment', '\n', '// another comment \t/', '\n', '/* a\n third comment */' ]
& { predicate }
return
statement.
true
in boolean context, just return undefined
and do not advance the
parser position; otherwise consider the match failed.
offset
function.
The offset
function returns a zero-based character index into the input string.
line
and column
functions.
Both return one-based indexes.
options
variable.
[~/srcPLgrado/pegjs_examples(master)]$ cat semantic_predicate.coffee PEG = require('pegjs') coffee = require 'pegjs-coffee-plugin' grammar = ''' { @util = require("util") @g = "visible variable" console.log("Inside Initializer! options = "+@util.inspect(options)) } start = 'a' { console.log(@g); 1 } / c:'c' '\\n' & { console.log("inside predicate: @g = '#{@g}' c = '#{c}'") console.log("options = #{@util.inspect(options)}") console.log("offset = #{offset()}") console.log("line = #{line()}") console.log("column = #{column()}") true } 'b' { 2 } ''' parser = PEG.buildParser(grammar, plugins: [coffee]) r = parser.parse('a', x: 'hello') console.log r r = parser.parse("c\nb", y : 'world') console.log rWhen executed produces the following output:
[~/srcPLgrado/pegjs_examples(master)]$ coffee semantic_predicate.coffee Inside Initializer! options = { x: 'hello' } visible variable 1 Inside Initializer! options = { y: 'world' } inside predicate: @g = 'visible variable' c = 'c' options = { y: 'world' } offset = 2 line = 2 column = 1 2
! { predicate }
return
statement.
false
in boolean context, just return undefined
and do not advance the
parser position; otherwise consider the match failed.
offset
function.
The offset
function returns a zero-based character index into the input string.
line
and column
functions.
Both return one-based indexes.
options
variable.
$ expressionTry to match the expression. If the match succeeds, return the matched string instead of the match result.
label : expression
label
.
expression1 expression2 ... expressionn
Match a sequence of expressions and return their match results in an array.
expression { action }
action
,
otherwise consider the match failed.
action
is a piece of JavaScript code that is executed as if it was
inside a function.
return
statement.
expected
function, which makes the parser throw an exception.
The function takes
one parameter — a description
of what was expected at the current
position. This description
will be used as part of a message of the
thrown exception.
error
function, which also
makes the parser throw an exception. The function takes one parameter
— an error
message. This message will be used by the thrown exception.
text
function.
offset
function.
It returns a zero-based character index into the input string.
line
and column
functions. Both return one-based indexes.
options
variable.
expression1 / expression2 / ... / expressionnTry to match the first expression, if it does not succeed, try the second one, etc. Return the match result of the first successfully matched expression. If no expression matches, consider the match failed.
[~/srcPLgrado/pegjs(master)]$ cat src/parser.pegjs grammar = __ initializer? rule+ initializer = action semicolon? rule = identifier string? equals expression semicolon? expression = choice choice = sequence (slash sequence)* sequence = labeled* action / labeled* labeled = identifier colon prefixed / prefixed prefixed = dollar suffixed / and action / and suffixed / not action / not suffixed / suffixed suffixed = primary question / primary star / primary plus / primary primary = identifier !(string? equals) / literal / class / dot / lparen expression rparen /* "Lexical" elements */ action "action" = braced __ braced = "{" (braced / nonBraceCharacters)* "}" nonBraceCharacters = nonBraceCharacter+ nonBraceCharacter = [^{}] equals = "=" __ colon = ":" __ semicolon = ";" __ slash = "/" __ and = "&" __ not = "!" __ dollar = "$" __ question = "?" __ star = "*" __ plus = "+" __ lparen = "(" __ rparen = ")" __ dot = "." __ /* * Modeled after ECMA-262, 5th ed., 7.6, but much simplified: * * * no Unicode escape sequences * * * "Unicode combining marks" and "Unicode connection punctuation" can't be * part of the identifier * * * only [a-zA-Z] is considered a "Unicode letter" * * * only [0-9] is considered a "Unicode digit" * * The simplifications were made just to make the implementation little bit * easier, there is no "philosophical" reason behind them. * * Contrary to ECMA 262, the "$" character is not valid because it serves other * purpose in the grammar. */ identifier "identifier" = (letter / "_") (letter / digit / "_")* __ /* * Modeled after ECMA-262, 5th ed., 7.8.4. (syntax & semantics, rules only * vaguely). */ literal "literal" = (doubleQuotedString / singleQuotedString) "i"? __ string "string" = (doubleQuotedString / singleQuotedString) __ doubleQuotedString = '"' doubleQuotedCharacter* '"' doubleQuotedCharacter = simpleDoubleQuotedCharacter / simpleEscapeSequence / zeroEscapeSequence / hexEscapeSequence / unicodeEscapeSequence / eolEscapeSequence simpleDoubleQuotedCharacter = !('"' / "\\" / eolChar) . singleQuotedString = "'" singleQuotedCharacter* "'" singleQuotedCharacter = simpleSingleQuotedCharacter / simpleEscapeSequence / zeroEscapeSequence / hexEscapeSequence / unicodeEscapeSequence / eolEscapeSequence simpleSingleQuotedCharacter = !("'" / "\\" / eolChar) . class "character class" = "[" "^"? (classCharacterRange / classCharacter)* "]" "i"? __ classCharacterRange = classCharacter "-" classCharacter classCharacter = bracketDelimitedCharacter bracketDelimitedCharacter = simpleBracketDelimitedCharacter / simpleEscapeSequence / zeroEscapeSequence / hexEscapeSequence / unicodeEscapeSequence / eolEscapeSequence simpleBracketDelimitedCharacter = !("]" / "\\" / eolChar) . simpleEscapeSequence = "\\" !(digit / "x" / "u" / eolChar) . zeroEscapeSequence = "\\0" !digit hexEscapeSequence = "\\x" hexDigit hexDigit) unicodeEscapeSequence = "\\u" hexDigit hexDigit hexDigit hexDigit) eolEscapeSequence = "\\" eol digit = [0-9] hexDigit = [0-9a-fA-F] letter = lowerCaseLetter / upperCaseLetter lowerCaseLetter = [a-z] upperCaseLetter = [A-Z] __ = (whitespace / eol / comment)* /* Modeled after ECMA-262, 5th ed., 7.4. */ comment "comment" = singleLineComment / multiLineComment singleLineComment = "//" (!eolChar .)* multiLineComment = "/*" (!"*/" .)* "*/" /* Modeled after ECMA-262, 5th ed., 7.3. */ eol "end of line" = "\n" / "\r\n" / "\r" / "\u2028" / "\u2029" eolChar = [\n\r\u2028\u2029] /* Modeled after ECMA-262, 5th ed., 7.2. */ whitespace "whitespace" = [ \t\v\f\u00A0\uFEFF\u1680\u180E\u2000-\u200A\u202F\u205F\u3000]
Casiano Rodríguez León