Skip to content

Instantly share code, notes, and snippets.

@jdrew1303
Forked from ansoncat/1-rule.pegjs
Created November 26, 2017 03:41
Show Gist options
  • Save jdrew1303/54354400656bd9c9d93b174efc9844a5 to your computer and use it in GitHub Desktop.
Save jdrew1303/54354400656bd9c9d93b174efc9844a5 to your computer and use it in GitHub Desktop.
Simple grammar in PEG.js
/*
* For something like A == 2 AND (B > 3 OR C >= 4 )
*/
start
= additive
additive
= left:multiplicative _ "OR" _ right:additive { return left + " OR " + right; }
/ multiplicative
multiplicative
= left:primary _ "AND" _ right:multiplicative { return left + " AND " + right; }
/ primary
primary
= rule
/ _ "(" _ additive:additive _ ")" _ { return "( " + additive + " )"; }
rule
= left:symbol _ relation:relation _ right:integer { return left + " " + relation + " " + right; }
relation
= op:[><]fix:[=]? {return op+fix;}
/ "=="
integer "integer"
= _ digits:[0-9]+ _ { return parseInt(digits.join(""), 10); }
symbol
= _ alphas:[a-zA-z]+ _ { return alphas.join(""); }
_ "whitespace"
= [ \t\r\n]*
var input = 'A < 2 AND (B > 3 OR C >= 4) ';
function parse(input) {
var count = 0, result = null;
result = OR();
function OR(){
var org = count, left = AND(), right;
if (left == null) {
count = org;
return null;
}
eatWS();
//match OR
if (match(/^OR/) == null) {
return left;
}
eatWS();
right = OR();
if (right == null){
count = org;
return null;
}
console.log(left + " OR " + right);
return left + " OR " + right;
}
function AND(){
var org = count, left = PRIMARY(), right;
if (left == null) {
count = org;
return null;
}
eatWS();
//match AND
if (match(/^AND/) == null) {
return left;
}
eatWS();
right = AND();
if (right == null){
count = org;
return null;
}
console.log(left + " AND " + right);
return left + " AND " + right;
}
function PRIMARY() {
var org = count, result;
eatWS();
//match (
if (match(/^\(/) == null)
return RULE();
else {
eatWS();
result = OR();
if (result == null){
count = org;
return null;
}
eatWS();
if (match(/^\)/) == null) {
errorcount = count;
count = org;
return null;
}
eatWS();
console.log("( " + result + " )");
return "( " + result + " )";
}
};
function RULE() {
var org = count, left, rel, right;
eatWS();
left = match(/^[a-zA-z]+/);
if (left == null) {
count = org;
return null;
}
eatWS();
rel = match(/^[><]=?|==/);
if (rel == null) {
count = org;
return null;
}
eatWS();
right = match(/^[0-9]+/);
if (right == null) {
count = org;
return null;
}
console.log(left + " " + rel + " " + parseInt(right, 10));
return left + " " + rel + " " + parseInt(right, 10);
}
function match(reg) {
var tmp = input.slice(count).match(reg), result;
if (tmp == null) {
return null;
}
result = tmp[0];
count += result.length;
return result;
}
function eatWS() {
match(/[ \t\r\n]*/);
}
console.log(count);
if (count != input.length) result = null;
return result;
};
var r = parse(input);
console.log('--------------------------------------------');
if (r != null) {
console.log(r);
} else {
console.log('Parsing error');
}
module.exports = (function(){
/*
* Generated by PEG.js 0.7.0.
*
* http://pegjs.majda.cz/
*/
function quote(s) {
/*
* ECMA-262, 5th ed., 7.8.4: All characters may appear literally in a
* string literal except for the closing quote character, backslash,
* carriage return, line separator, paragraph separator, and line feed.
* Any character may appear in the form of an escape sequence.
*
* For portability, we also escape escape all control and non-ASCII
* characters. Note that "\0" and "\v" escape sequences are not used
* because JSHint does not like the first and IE the second.
*/
return '"' + s
.replace(/\\/g, '\\\\') // backslash
.replace(/"/g, '\\"') // closing quote character
.replace(/\x08/g, '\\b') // backspace
.replace(/\t/g, '\\t') // horizontal tab
.replace(/\n/g, '\\n') // line feed
.replace(/\f/g, '\\f') // form feed
.replace(/\r/g, '\\r') // carriage return
.replace(/[\x00-\x07\x0B\x0E-\x1F\x80-\uFFFF]/g, escape)
+ '"';
}
var result = {
/*
* Parses the input with a generated parser. If the parsing is successfull,
* returns a value explicitly or implicitly specified by the grammar from
* which the parser was generated (see |PEG.buildParser|). If the parsing is
* unsuccessful, throws |PEG.parser.SyntaxError| describing the error.
*/
parse: function(input, startRule) {
var parseFunctions = {
"additive": parse_additive,
"multiplicative": parse_multiplicative,
"primary": parse_primary,
"rule": parse_rule,
"relation": parse_relation,
"integer": parse_integer,
"symbol": parse_symbol,
"_": parse__
};
if (startRule !== undefined) {
if (parseFunctions[startRule] === undefined) {
throw new Error("Invalid rule name: " + quote(startRule) + ".");
}
} else {
startRule = "additive";
}
var pos = 0;
var reportFailures = 0;
var rightmostFailuresPos = 0;
var rightmostFailuresExpected = [];
function padLeft(input, padding, length) {
var result = input;
var padLength = length - input.length;
for (var i = 0; i < padLength; i++) {
result = padding + result;
}
return result;
}
function escape(ch) {
var charCode = ch.charCodeAt(0);
var escapeChar;
var length;
if (charCode <= 0xFF) {
escapeChar = 'x';
length = 2;
} else {
escapeChar = 'u';
length = 4;
}
return '\\' + escapeChar + padLeft(charCode.toString(16).toUpperCase(), '0', length);
}
function matchFailed(failure) {
if (pos < rightmostFailuresPos) {
return;
}
if (pos > rightmostFailuresPos) {
rightmostFailuresPos = pos;
rightmostFailuresExpected = [];
}
rightmostFailuresExpected.push(failure);
}
function parse_additive() {
var result0, result1, result2, result3, result4;
var pos0, pos1;
pos0 = pos;
pos1 = pos;
result0 = parse_multiplicative();
if (result0 !== null) {
result1 = parse__();
if (result1 !== null) {
if (input.substr(pos, 2) === "OR") {
result2 = "OR";
pos += 2;
} else {
result2 = null;
if (reportFailures === 0) {
matchFailed("\"OR\"");
}
}
if (result2 !== null) {
result3 = parse__();
if (result3 !== null) {
result4 = parse_additive();
if (result4 !== null) {
result0 = [result0, result1, result2, result3, result4];
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
if (result0 !== null) {
result0 = (function(offset, left, right) { return left + " OR " + right; })(pos0, result0[0], result0[4]);
}
if (result0 === null) {
pos = pos0;
}
if (result0 === null) {
result0 = parse_multiplicative();
}
return result0;
}
function parse_multiplicative() {
var result0, result1, result2, result3, result4;
var pos0, pos1;
pos0 = pos;
pos1 = pos;
result0 = parse_primary();
if (result0 !== null) {
result1 = parse__();
if (result1 !== null) {
if (input.substr(pos, 3) === "AND") {
result2 = "AND";
pos += 3;
} else {
result2 = null;
if (reportFailures === 0) {
matchFailed("\"AND\"");
}
}
if (result2 !== null) {
result3 = parse__();
if (result3 !== null) {
result4 = parse_multiplicative();
if (result4 !== null) {
result0 = [result0, result1, result2, result3, result4];
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
if (result0 !== null) {
result0 = (function(offset, left, right) { return left + " AND " + right; })(pos0, result0[0], result0[4]);
}
if (result0 === null) {
pos = pos0;
}
if (result0 === null) {
result0 = parse_primary();
}
return result0;
}
function parse_primary() {
var result0, result1, result2, result3, result4, result5, result6;
var pos0, pos1;
result0 = parse_rule();
if (result0 === null) {
pos0 = pos;
pos1 = pos;
result0 = parse__();
if (result0 !== null) {
if (input.charCodeAt(pos) === 40) {
result1 = "(";
pos++;
} else {
result1 = null;
if (reportFailures === 0) {
matchFailed("\"(\"");
}
}
if (result1 !== null) {
result2 = parse__();
if (result2 !== null) {
result3 = parse_additive();
if (result3 !== null) {
result4 = parse__();
if (result4 !== null) {
if (input.charCodeAt(pos) === 41) {
result5 = ")";
pos++;
} else {
result5 = null;
if (reportFailures === 0) {
matchFailed("\")\"");
}
}
if (result5 !== null) {
result6 = parse__();
if (result6 !== null) {
result0 = [result0, result1, result2, result3, result4, result5, result6];
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
if (result0 !== null) {
result0 = (function(offset, additive) { return "( " + additive + " )"; })(pos0, result0[3]);
}
if (result0 === null) {
pos = pos0;
}
}
return result0;
}
function parse_rule() {
var result0, result1, result2, result3, result4;
var pos0, pos1;
pos0 = pos;
pos1 = pos;
result0 = parse_symbol();
if (result0 !== null) {
result1 = parse__();
if (result1 !== null) {
result2 = parse_relation();
if (result2 !== null) {
result3 = parse__();
if (result3 !== null) {
result4 = parse_integer();
if (result4 !== null) {
result0 = [result0, result1, result2, result3, result4];
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
if (result0 !== null) {
result0 = (function(offset, left, relation, right) { return left + " " + relation + " " + right; })(pos0, result0[0], result0[2], result0[4]);
}
if (result0 === null) {
pos = pos0;
}
return result0;
}
function parse_relation() {
var result0, result1;
var pos0, pos1;
pos0 = pos;
pos1 = pos;
if (/^[><]/.test(input.charAt(pos))) {
result0 = input.charAt(pos);
pos++;
} else {
result0 = null;
if (reportFailures === 0) {
matchFailed("[><]");
}
}
if (result0 !== null) {
if (/^[=]/.test(input.charAt(pos))) {
result1 = input.charAt(pos);
pos++;
} else {
result1 = null;
if (reportFailures === 0) {
matchFailed("[=]");
}
}
result1 = result1 !== null ? result1 : "";
if (result1 !== null) {
result0 = [result0, result1];
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
if (result0 !== null) {
result0 = (function(offset, op, fix) {return op+fix;})(pos0, result0[0], result0[1]);
}
if (result0 === null) {
pos = pos0;
}
if (result0 === null) {
if (input.substr(pos, 2) === "==") {
result0 = "==";
pos += 2;
} else {
result0 = null;
if (reportFailures === 0) {
matchFailed("\"==\"");
}
}
}
return result0;
}
function parse_integer() {
var result0, result1, result2;
var pos0, pos1;
reportFailures++;
pos0 = pos;
pos1 = pos;
result0 = parse__();
if (result0 !== null) {
if (/^[0-9]/.test(input.charAt(pos))) {
result2 = input.charAt(pos);
pos++;
} else {
result2 = null;
if (reportFailures === 0) {
matchFailed("[0-9]");
}
}
if (result2 !== null) {
result1 = [];
while (result2 !== null) {
result1.push(result2);
if (/^[0-9]/.test(input.charAt(pos))) {
result2 = input.charAt(pos);
pos++;
} else {
result2 = null;
if (reportFailures === 0) {
matchFailed("[0-9]");
}
}
}
} else {
result1 = null;
}
if (result1 !== null) {
result2 = parse__();
if (result2 !== null) {
result0 = [result0, result1, result2];
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
if (result0 !== null) {
result0 = (function(offset, digits) { return parseInt(digits.join(""), 10); })(pos0, result0[1]);
}
if (result0 === null) {
pos = pos0;
}
reportFailures--;
if (reportFailures === 0 && result0 === null) {
matchFailed("integer");
}
return result0;
}
function parse_symbol() {
var result0, result1, result2;
var pos0, pos1;
pos0 = pos;
pos1 = pos;
result0 = parse__();
if (result0 !== null) {
if (/^[a-zA-z]/.test(input.charAt(pos))) {
result2 = input.charAt(pos);
pos++;
} else {
result2 = null;
if (reportFailures === 0) {
matchFailed("[a-zA-z]");
}
}
if (result2 !== null) {
result1 = [];
while (result2 !== null) {
result1.push(result2);
if (/^[a-zA-z]/.test(input.charAt(pos))) {
result2 = input.charAt(pos);
pos++;
} else {
result2 = null;
if (reportFailures === 0) {
matchFailed("[a-zA-z]");
}
}
}
} else {
result1 = null;
}
if (result1 !== null) {
result2 = parse__();
if (result2 !== null) {
result0 = [result0, result1, result2];
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
} else {
result0 = null;
pos = pos1;
}
if (result0 !== null) {
result0 = (function(offset, alphas) { return alphas.join(""); })(pos0, result0[1]);
}
if (result0 === null) {
pos = pos0;
}
return result0;
}
function parse__() {
var result0, result1;
reportFailures++;
result0 = [];
if (/^[ \t\r\n]/.test(input.charAt(pos))) {
result1 = input.charAt(pos);
pos++;
} else {
result1 = null;
if (reportFailures === 0) {
matchFailed("[ \\t\\r\\n]");
}
}
while (result1 !== null) {
result0.push(result1);
if (/^[ \t\r\n]/.test(input.charAt(pos))) {
result1 = input.charAt(pos);
pos++;
} else {
result1 = null;
if (reportFailures === 0) {
matchFailed("[ \\t\\r\\n]");
}
}
}
reportFailures--;
if (reportFailures === 0 && result0 === null) {
matchFailed("whitespace");
}
return result0;
}
function cleanupExpected(expected) {
expected.sort();
var lastExpected = null;
var cleanExpected = [];
for (var i = 0; i < expected.length; i++) {
if (expected[i] !== lastExpected) {
cleanExpected.push(expected[i]);
lastExpected = expected[i];
}
}
return cleanExpected;
}
function computeErrorPosition() {
/*
* The first idea was to use |String.split| to break the input up to the
* error position along newlines and derive the line and column from
* there. However IE's |split| implementation is so broken that it was
* enough to prevent it.
*/
var line = 1;
var column = 1;
var seenCR = false;
for (var i = 0; i < Math.max(pos, rightmostFailuresPos); i++) {
var ch = input.charAt(i);
if (ch === "\n") {
if (!seenCR) { line++; }
column = 1;
seenCR = false;
} else if (ch === "\r" || ch === "\u2028" || ch === "\u2029") {
line++;
column = 1;
seenCR = true;
} else {
column++;
seenCR = false;
}
}
return { line: line, column: column };
}
var result = parseFunctions[startRule]();
/*
* The parser is now in one of the following three states:
*
* 1. The parser successfully parsed the whole input.
*
* - |result !== null|
* - |pos === input.length|
* - |rightmostFailuresExpected| may or may not contain something
*
* 2. The parser successfully parsed only a part of the input.
*
* - |result !== null|
* - |pos < input.length|
* - |rightmostFailuresExpected| may or may not contain something
*
* 3. The parser did not successfully parse any part of the input.
*
* - |result === null|
* - |pos === 0|
* - |rightmostFailuresExpected| contains at least one failure
*
* All code following this comment (including called functions) must
* handle these states.
*/
if (result === null || pos !== input.length) {
var offset = Math.max(pos, rightmostFailuresPos);
var found = offset < input.length ? input.charAt(offset) : null;
var errorPosition = computeErrorPosition();
throw new this.SyntaxError(
cleanupExpected(rightmostFailuresExpected),
found,
offset,
errorPosition.line,
errorPosition.column
);
}
return result;
},
/* Returns the parser source code. */
toSource: function() { return this._source; }
};
/* Thrown when a parser encounters a syntax error. */
result.SyntaxError = function(expected, found, offset, line, column) {
function buildMessage(expected, found) {
var expectedHumanized, foundHumanized;
switch (expected.length) {
case 0:
expectedHumanized = "end of input";
break;
case 1:
expectedHumanized = expected[0];
break;
default:
expectedHumanized = expected.slice(0, expected.length - 1).join(", ")
+ " or "
+ expected[expected.length - 1];
}
foundHumanized = found ? quote(found) : "end of input";
return "Expected " + expectedHumanized + " but " + foundHumanized + " found.";
}
this.name = "SyntaxError";
this.expected = expected;
this.found = found;
this.message = buildMessage(expected, found);
this.offset = offset;
this.line = line;
this.column = column;
};
result.SyntaxError.prototype = Error.prototype;
return result;
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment