Created
September 20, 2012 13:57
-
-
Save ansoncat/3756084 to your computer and use it in GitHub Desktop.
Simple grammar in PEG.js
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* For something like A == 2 AND (B > 3 OR C >= 4 ) | |
*/ | |
start | |
= additive | |
additive | |
= left:multiplicative _ "OR" _ right:additive { return left + " OR " + right; } | |
/ multiplicative | |
multiplicative | |
= left:primary _ "AND" _ right:multiplicative { return left + " AND " + right; } | |
/ primary | |
primary | |
= rule | |
/ _ "(" _ additive:additive _ ")" _ { return "( " + additive + " )"; } | |
rule | |
= left:symbol _ relation:relation _ right:integer { return left + " " + relation + " " + right; } | |
relation | |
= op:[><]fix:[=]? {return op+fix;} | |
/ "==" | |
integer "integer" | |
= _ digits:[0-9]+ _ { return parseInt(digits.join(""), 10); } | |
symbol | |
= _ alphas:[a-zA-z]+ _ { return alphas.join(""); } | |
_ "whitespace" | |
= [ \t\r\n]* |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var input = 'A < 2 AND (B > 3 OR C >= 4) '; | |
function parse(input) { | |
var count = 0, result = null; | |
result = OR(); | |
function OR(){ | |
var org = count, left = AND(), right; | |
if (left == null) { | |
count = org; | |
return null; | |
} | |
eatWS(); | |
//match OR | |
if (match(/^OR/) == null) { | |
return left; | |
} | |
eatWS(); | |
right = OR(); | |
if (right == null){ | |
count = org; | |
return null; | |
} | |
console.log(left + " OR " + right); | |
return left + " OR " + right; | |
} | |
function AND(){ | |
var org = count, left = PRIMARY(), right; | |
if (left == null) { | |
count = org; | |
return null; | |
} | |
eatWS(); | |
//match AND | |
if (match(/^AND/) == null) { | |
return left; | |
} | |
eatWS(); | |
right = AND(); | |
if (right == null){ | |
count = org; | |
return null; | |
} | |
console.log(left + " AND " + right); | |
return left + " AND " + right; | |
} | |
function PRIMARY() { | |
var org = count, result; | |
eatWS(); | |
//match ( | |
if (match(/^\(/) == null) | |
return RULE(); | |
else { | |
eatWS(); | |
result = OR(); | |
if (result == null){ | |
count = org; | |
return null; | |
} | |
eatWS(); | |
if (match(/^\)/) == null) { | |
errorcount = count; | |
count = org; | |
return null; | |
} | |
eatWS(); | |
console.log("( " + result + " )"); | |
return "( " + result + " )"; | |
} | |
}; | |
function RULE() { | |
var org = count, left, rel, right; | |
eatWS(); | |
left = match(/^[a-zA-z]+/); | |
if (left == null) { | |
count = org; | |
return null; | |
} | |
eatWS(); | |
rel = match(/^[><]=?|==/); | |
if (rel == null) { | |
count = org; | |
return null; | |
} | |
eatWS(); | |
right = match(/^[0-9]+/); | |
if (right == null) { | |
count = org; | |
return null; | |
} | |
console.log(left + " " + rel + " " + parseInt(right, 10)); | |
return left + " " + rel + " " + parseInt(right, 10); | |
} | |
function match(reg) { | |
var tmp = input.slice(count).match(reg), result; | |
if (tmp == null) { | |
return null; | |
} | |
result = tmp[0]; | |
count += result.length; | |
return result; | |
} | |
function eatWS() { | |
match(/[ \t\r\n]*/); | |
} | |
console.log(count); | |
if (count != input.length) result = null; | |
return result; | |
}; | |
var r = parse(input); | |
console.log('--------------------------------------------'); | |
if (r != null) { | |
console.log(r); | |
} else { | |
console.log('Parsing error'); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module.exports = (function(){ | |
/* | |
* Generated by PEG.js 0.7.0. | |
* | |
* http://pegjs.majda.cz/ | |
*/ | |
function quote(s) { | |
/* | |
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a | |
* string literal except for the closing quote character, backslash, | |
* carriage return, line separator, paragraph separator, and line feed. | |
* Any character may appear in the form of an escape sequence. | |
* | |
* For portability, we also escape escape all control and non-ASCII | |
* characters. Note that "\0" and "\v" escape sequences are not used | |
* because JSHint does not like the first and IE the second. | |
*/ | |
return '"' + s | |
.replace(/\\/g, '\\\\') // backslash | |
.replace(/"/g, '\\"') // closing quote character | |
.replace(/\x08/g, '\\b') // backspace | |
.replace(/\t/g, '\\t') // horizontal tab | |
.replace(/\n/g, '\\n') // line feed | |
.replace(/\f/g, '\\f') // form feed | |
.replace(/\r/g, '\\r') // carriage return | |
.replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, escape) | |
+ '"'; | |
} | |
var result = { | |
/* | |
* Parses the input with a generated parser. If the parsing is successfull, | |
* returns a value explicitly or implicitly specified by the grammar from | |
* which the parser was generated (see |PEG.buildParser|). If the parsing is | |
* unsuccessful, throws |PEG.parser.SyntaxError| describing the error. | |
*/ | |
parse: function(input, startRule) { | |
var parseFunctions = { | |
"additive": parse_additive, | |
"multiplicative": parse_multiplicative, | |
"primary": parse_primary, | |
"rule": parse_rule, | |
"relation": parse_relation, | |
"integer": parse_integer, | |
"symbol": parse_symbol, | |
"_": parse__ | |
}; | |
if (startRule !== undefined) { | |
if (parseFunctions[startRule] === undefined) { | |
throw new Error("Invalid rule name: " + quote(startRule) + "."); | |
} | |
} else { | |
startRule = "additive"; | |
} | |
var pos = 0; | |
var reportFailures = 0; | |
var rightmostFailuresPos = 0; | |
var rightmostFailuresExpected = []; | |
function padLeft(input, padding, length) { | |
var result = input; | |
var padLength = length - input.length; | |
for (var i = 0; i < padLength; i++) { | |
result = padding + result; | |
} | |
return result; | |
} | |
function escape(ch) { | |
var charCode = ch.charCodeAt(0); | |
var escapeChar; | |
var length; | |
if (charCode <= 0xFF) { | |
escapeChar = 'x'; | |
length = 2; | |
} else { | |
escapeChar = 'u'; | |
length = 4; | |
} | |
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length); | |
} | |
function matchFailed(failure) { | |
if (pos < rightmostFailuresPos) { | |
return; | |
} | |
if (pos > rightmostFailuresPos) { | |
rightmostFailuresPos = pos; | |
rightmostFailuresExpected = []; | |
} | |
rightmostFailuresExpected.push(failure); | |
} | |
function parse_additive() { | |
var result0, result1, result2, result3, result4; | |
var pos0, pos1; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = parse_multiplicative(); | |
if (result0 !== null) { | |
result1 = parse__(); | |
if (result1 !== null) { | |
if (input.substr(pos, 2) === "OR") { | |
result2 = "OR"; | |
pos += 2; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("\"OR\""); | |
} | |
} | |
if (result2 !== null) { | |
result3 = parse__(); | |
if (result3 !== null) { | |
result4 = parse_additive(); | |
if (result4 !== null) { | |
result0 = [result0, result1, result2, result3, result4]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, left, right) { return left + " OR " + right; })(pos0, result0[0], result0[4]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
if (result0 === null) { | |
result0 = parse_multiplicative(); | |
} | |
return result0; | |
} | |
function parse_multiplicative() { | |
var result0, result1, result2, result3, result4; | |
var pos0, pos1; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = parse_primary(); | |
if (result0 !== null) { | |
result1 = parse__(); | |
if (result1 !== null) { | |
if (input.substr(pos, 3) === "AND") { | |
result2 = "AND"; | |
pos += 3; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("\"AND\""); | |
} | |
} | |
if (result2 !== null) { | |
result3 = parse__(); | |
if (result3 !== null) { | |
result4 = parse_multiplicative(); | |
if (result4 !== null) { | |
result0 = [result0, result1, result2, result3, result4]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, left, right) { return left + " AND " + right; })(pos0, result0[0], result0[4]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
if (result0 === null) { | |
result0 = parse_primary(); | |
} | |
return result0; | |
} | |
function parse_primary() { | |
var result0, result1, result2, result3, result4, result5, result6; | |
var pos0, pos1; | |
result0 = parse_rule(); | |
if (result0 === null) { | |
pos0 = pos; | |
pos1 = pos; | |
result0 = parse__(); | |
if (result0 !== null) { | |
if (input.charCodeAt(pos) === 40) { | |
result1 = "("; | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("\"(\""); | |
} | |
} | |
if (result1 !== null) { | |
result2 = parse__(); | |
if (result2 !== null) { | |
result3 = parse_additive(); | |
if (result3 !== null) { | |
result4 = parse__(); | |
if (result4 !== null) { | |
if (input.charCodeAt(pos) === 41) { | |
result5 = ")"; | |
pos++; | |
} else { | |
result5 = null; | |
if (reportFailures === 0) { | |
matchFailed("\")\""); | |
} | |
} | |
if (result5 !== null) { | |
result6 = parse__(); | |
if (result6 !== null) { | |
result0 = [result0, result1, result2, result3, result4, result5, result6]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, additive) { return "( " + additive + " )"; })(pos0, result0[3]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
} | |
return result0; | |
} | |
function parse_rule() { | |
var result0, result1, result2, result3, result4; | |
var pos0, pos1; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = parse_symbol(); | |
if (result0 !== null) { | |
result1 = parse__(); | |
if (result1 !== null) { | |
result2 = parse_relation(); | |
if (result2 !== null) { | |
result3 = parse__(); | |
if (result3 !== null) { | |
result4 = parse_integer(); | |
if (result4 !== null) { | |
result0 = [result0, result1, result2, result3, result4]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, left, relation, right) { return left + " " + relation + " " + right; })(pos0, result0[0], result0[2], result0[4]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
return result0; | |
} | |
function parse_relation() { | |
var result0, result1; | |
var pos0, pos1; | |
pos0 = pos; | |
pos1 = pos; | |
if (/^[><]/.test(input.charAt(pos))) { | |
result0 = input.charAt(pos); | |
pos++; | |
} else { | |
result0 = null; | |
if (reportFailures === 0) { | |
matchFailed("[><]"); | |
} | |
} | |
if (result0 !== null) { | |
if (/^[=]/.test(input.charAt(pos))) { | |
result1 = input.charAt(pos); | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("[=]"); | |
} | |
} | |
result1 = result1 !== null ? result1 : ""; | |
if (result1 !== null) { | |
result0 = [result0, result1]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, op, fix) {return op+fix;})(pos0, result0[0], result0[1]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
if (result0 === null) { | |
if (input.substr(pos, 2) === "==") { | |
result0 = "=="; | |
pos += 2; | |
} else { | |
result0 = null; | |
if (reportFailures === 0) { | |
matchFailed("\"==\""); | |
} | |
} | |
} | |
return result0; | |
} | |
function parse_integer() { | |
var result0, result1, result2; | |
var pos0, pos1; | |
reportFailures++; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = parse__(); | |
if (result0 !== null) { | |
if (/^[0-9]/.test(input.charAt(pos))) { | |
result2 = input.charAt(pos); | |
pos++; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("[0-9]"); | |
} | |
} | |
if (result2 !== null) { | |
result1 = []; | |
while (result2 !== null) { | |
result1.push(result2); | |
if (/^[0-9]/.test(input.charAt(pos))) { | |
result2 = input.charAt(pos); | |
pos++; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("[0-9]"); | |
} | |
} | |
} | |
} else { | |
result1 = null; | |
} | |
if (result1 !== null) { | |
result2 = parse__(); | |
if (result2 !== null) { | |
result0 = [result0, result1, result2]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, digits) { return parseInt(digits.join(""), 10); })(pos0, result0[1]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
reportFailures--; | |
if (reportFailures === 0 && result0 === null) { | |
matchFailed("integer"); | |
} | |
return result0; | |
} | |
function parse_symbol() { | |
var result0, result1, result2; | |
var pos0, pos1; | |
pos0 = pos; | |
pos1 = pos; | |
result0 = parse__(); | |
if (result0 !== null) { | |
if (/^[a-zA-z]/.test(input.charAt(pos))) { | |
result2 = input.charAt(pos); | |
pos++; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("[a-zA-z]"); | |
} | |
} | |
if (result2 !== null) { | |
result1 = []; | |
while (result2 !== null) { | |
result1.push(result2); | |
if (/^[a-zA-z]/.test(input.charAt(pos))) { | |
result2 = input.charAt(pos); | |
pos++; | |
} else { | |
result2 = null; | |
if (reportFailures === 0) { | |
matchFailed("[a-zA-z]"); | |
} | |
} | |
} | |
} else { | |
result1 = null; | |
} | |
if (result1 !== null) { | |
result2 = parse__(); | |
if (result2 !== null) { | |
result0 = [result0, result1, result2]; | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
} else { | |
result0 = null; | |
pos = pos1; | |
} | |
if (result0 !== null) { | |
result0 = (function(offset, alphas) { return alphas.join(""); })(pos0, result0[1]); | |
} | |
if (result0 === null) { | |
pos = pos0; | |
} | |
return result0; | |
} | |
function parse__() { | |
var result0, result1; | |
reportFailures++; | |
result0 = []; | |
if (/^[ \t\r\n]/.test(input.charAt(pos))) { | |
result1 = input.charAt(pos); | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("[ \\t\\r\\n]"); | |
} | |
} | |
while (result1 !== null) { | |
result0.push(result1); | |
if (/^[ \t\r\n]/.test(input.charAt(pos))) { | |
result1 = input.charAt(pos); | |
pos++; | |
} else { | |
result1 = null; | |
if (reportFailures === 0) { | |
matchFailed("[ \\t\\r\\n]"); | |
} | |
} | |
} | |
reportFailures--; | |
if (reportFailures === 0 && result0 === null) { | |
matchFailed("whitespace"); | |
} | |
return result0; | |
} | |
function cleanupExpected(expected) { | |
expected.sort(); | |
var lastExpected = null; | |
var cleanExpected = []; | |
for (var i = 0; i < expected.length; i++) { | |
if (expected[i] !== lastExpected) { | |
cleanExpected.push(expected[i]); | |
lastExpected = expected[i]; | |
} | |
} | |
return cleanExpected; | |
} | |
function computeErrorPosition() { | |
/* | |
* The first idea was to use |String.split| to break the input up to the | |
* error position along newlines and derive the line and column from | |
* there. However IE's |split| implementation is so broken that it was | |
* enough to prevent it. | |
*/ | |
var line = 1; | |
var column = 1; | |
var seenCR = false; | |
for (var i = 0; i < Math.max(pos, rightmostFailuresPos); i++) { | |
var ch = input.charAt(i); | |
if (ch === "\n") { | |
if (!seenCR) { line++; } | |
column = 1; | |
seenCR = false; | |
} else if (ch === "\r" || ch === "\u2028" || ch === "\u2029") { | |
line++; | |
column = 1; | |
seenCR = true; | |
} else { | |
column++; | |
seenCR = false; | |
} | |
} | |
return { line: line, column: column }; | |
} | |
var result = parseFunctions[startRule](); | |
/* | |
* The parser is now in one of the following three states: | |
* | |
* 1. The parser successfully parsed the whole input. | |
* | |
* - |result !== null| | |
* - |pos === input.length| | |
* - |rightmostFailuresExpected| may or may not contain something | |
* | |
* 2. The parser successfully parsed only a part of the input. | |
* | |
* - |result !== null| | |
* - |pos < input.length| | |
* - |rightmostFailuresExpected| may or may not contain something | |
* | |
* 3. The parser did not successfully parse any part of the input. | |
* | |
* - |result === null| | |
* - |pos === 0| | |
* - |rightmostFailuresExpected| contains at least one failure | |
* | |
* All code following this comment (including called functions) must | |
* handle these states. | |
*/ | |
if (result === null || pos !== input.length) { | |
var offset = Math.max(pos, rightmostFailuresPos); | |
var found = offset < input.length ? input.charAt(offset) : null; | |
var errorPosition = computeErrorPosition(); | |
throw new this.SyntaxError( | |
cleanupExpected(rightmostFailuresExpected), | |
found, | |
offset, | |
errorPosition.line, | |
errorPosition.column | |
); | |
} | |
return result; | |
}, | |
/* Returns the parser source code. */ | |
toSource: function() { return this._source; } | |
}; | |
/* Thrown when a parser encounters a syntax error. */ | |
result.SyntaxError = function(expected, found, offset, line, column) { | |
function buildMessage(expected, found) { | |
var expectedHumanized, foundHumanized; | |
switch (expected.length) { | |
case 0: | |
expectedHumanized = "end of input"; | |
break; | |
case 1: | |
expectedHumanized = expected[0]; | |
break; | |
default: | |
expectedHumanized = expected.slice(0, expected.length - 1).join(", ") | |
+ " or " | |
+ expected[expected.length - 1]; | |
} | |
foundHumanized = found ? quote(found) : "end of input"; | |
return "Expected " + expectedHumanized + " but " + foundHumanized + " found."; | |
} | |
this.name = "SyntaxError"; | |
this.expected = expected; | |
this.found = found; | |
this.message = buildMessage(expected, found); | |
this.offset = offset; | |
this.line = line; | |
this.column = column; | |
}; | |
result.SyntaxError.prototype = Error.prototype; | |
return result; | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment