Skip to content

Instantly share code, notes, and snippets.

@loloof64
Last active October 22, 2019 21:51
Show Gist options
  • Save loloof64/3adc28c15daf2f52e5e3de6ee0806186 to your computer and use it in GitHub Desktop.
Save loloof64/3adc28c15daf2f52e5e3de6ee0806186 to your computer and use it in GitHub Desktop.
simple adaptation of ecma5 grammar made for chevrotain
"use strict"
const tokenize = require("./constraint_script_lexer").tokenize
const ConstraintScriptParser = require("./constraint_script_parser").ConstraintScriptParser
const parserInstance = new ConstraintScriptParser()
function parse(str) {
const tokens = tokenize(str)
parserInstance.input = tokens
parserInstance.orgText = str
const value = parserInstance.Script()
if (parserInstance.errors.length > 0) {
throw Error(parserInstance.errors)
}
return value;
}
module.exports = {
parse, parserInstance
}
"use strict"
/**
* A template for generating syntax diagrams html file.
* See: https://github.com/SAP/chevrotain/tree/master/diagrams for more details.
*/
const path = require("path");
const fs = require("fs");
const chevrotain = require("chevrotain");
const grammar = require("./grammar/constraint_script_api");
// extract the serialized grammar.
const parserInstance = grammar.parserInstance
const serializedGrammar = parserInstance.getSerializedGastProductions()
// create the HTML Text
const htmlText = chevrotain.createSyntaxDiagramsCode(serializedGrammar)
// Write the HTML file to disk
const outPath = path.resolve(__dirname, "./")
fs.writeFileSync(outPath + "/constraint_script_diagram.html", htmlText)
"use strict"
/**
* ECMAScript cannot be easily lexed using a distinct lexing phase.
* See: https://users.soe.ucsc.edu/~cormac/papers/dls14a.pdf
*
* So to expedite the creation of the chevrotain ECMA5 grammar.
* The Acorn project was used to (only) tokenize the input text
*
* In the future this should be refactored to avoid the distinct lexing phase based on:
* https://github.com/SAP/chevrotain/blob/master/test/full_flow/ecma_quirks/ecma_quirks.ts
*
*/
const acorn = require("acorn")
const acornTokTypes = acorn.tokTypes
const tokens = require("./constraint_script_tokens")
function createChevToken(chevTokenClass, acornToken) {
return {
tokenTypeIdx: chevTokenClass.tokenTypeIdx,
image: acornToken.value,
startOffset: acornToken.start,
endOffset: acornToken.end
}
}
function tokenize(str) {
const result = []
for (let token of acorn.tokenizer(str, { ecmaVersion: 6 })) {
let acornType = token.type
let ctt
// https://github.com/ternjs/acorn/blob/master/src/tokentype.js#L54
switch (acornType) {
case acornTokTypes._var:
ctt = tokens.VarTok
break
case acornTokTypes.name:
switch (token.value) {
default:
ctt = tokens.Identifier
break
}
break
case acornTokTypes._return:
ctt = tokens.ReturnTok
break
case acornTokTypes.parenL:
ctt = tokens.LParen
break
case acornTokTypes.parenR:
ctt = tokens.RParen
break
case acornTokTypes.semi:
ctt = tokens.Semicolon
break
case acornTokTypes.prefix:
switch (token.value) {
case "!":
ctt = tokens.Exclamation
break
}
break
case acornTokTypes.logicalAND:
ctt = tokens.AmpersandAmpersand
break
case acornTokTypes.logicalOR:
ctt = tokens.VerticalBarVerticalBar
break
case acornTokTypes.question:
ctt = tokens.Question
break
case acornTokTypes.colon:
ctt = tokens.Colon
break
case acornTokTypes.modulo:
ctt = tokens.Percent
break
case acornTokTypes.plusMin:
switch (token.value) {
case "+":
ctt = tokens.Plus
break
case "-":
ctt = tokens.Minus
break
}
break
case acornTokTypes.relational:
switch (token.value) {
case "<":
ctt = tokens.Less
break
case ">":
ctt = tokens.Greater
break
case "<=":
ctt = tokens.LessEq
break
case ">=":
ctt = tokens.GreaterEq
break
}
break
case acornTokTypes.equality:
switch (token.value) {
case "==":
ctt = tokens.EqEq
break
case "!=":
ctt = tokens.NotEq
break
}
break
case acornTokTypes.eq:
ctt = tokens.Eq
break
case acornTokTypes._true:
ctt = tokens.TrueTok
break
case acornTokTypes._false:
ctt = tokens.FalseTok
break
case acornTokTypes.num:
ctt = tokens.NumericLiteral
break
default:
throw Error("sad sad panda")
}
const chevToken = createChevToken(ctt, token)
result.push(chevToken)
}
return result
}
module.exports = {
tokenize
}
"use strict"
const { EmbeddedActionsParser, EOF, tokenMatcher } = require("chevrotain")
const tokens = require("./constraint_script_tokens")
// for conciseness
const t = tokens
const ENABLE_SEMICOLON_INSERTION = true
const DISABLE_SEMICOLON_INSERTION = false
let records = {};
// as defined in https://www.ecma-international.org/ecma-262/5.1/index.html
class ConstraintScriptParser extends EmbeddedActionsParser {
set orgText(newText) {
this._orgText = newText
}
reset() {
super.reset();
records = {};
}
constructor() {
super(tokens, {
// Reduces Parser Initialization time and this grammar does not need
// a larger lookahead.
maxLookahead: 2
})
// Optimization to avoid traversing the prototype chain at hotspots.
this.SUPER_CONSUME = super.CONSUME
this.SUPER_CONSUME2 = super.CONSUME2
this._orgText = ""
// to avoid V8 hidden class changes by dynamic definition
// of properties on "this"
const $ = this
// A.3 Expressions
// Note that the binary expression operators are treated as a flat list
// instead of using a new rule for each precedence level.
// This is both faster and less verbose but it means additional logic must be used to re-order the flat list
// into a precedence tree.
// This approach was used in the swift compiler.
// https://developer.apple.com/library/content/documentation/Swift/Conceptual/Swift_Programming_Language/Expressions.html#//apple_ref/doc/uid/TP40014097-CH32-ID383
// (scroll down to the note on binary expressions)
// Also note that such logic can only be implemented once the parser actually outputs some data structure...
// See 11.1
$.RULE("PrimaryExpression", () => {
let value, id;
$.OR(
[
{ ALT: () => {
id = $.CONSUME(t.Identifier).image;
value = records[id];
}},
{ ALT: () => {
const rawValue = $.CONSUME(t.AbsLiteral).image;
value = parseInt(rawValue);
}},
{ ALT: () => {
value = $.SUBRULE($.ParenthesisExpression);
}}
]
);
return value;
})
$.RULE("ParenthesisExpression", () => {
$.CONSUME(t.LParen)
const value = $.SUBRULE($.Expression);
$.CONSUME(t.RParen)
return value;
})
// See 11.4
$.RULE("UnaryExpression", () => {
let value, op;
$.OR([
{ ALT: () => {
value = $.SUBRULE($.PrimaryExpression);
}},
{
ALT: () => {
$.OR2(
[
{ ALT: () => {
op = $.CONSUME(t.Plus);
}},
{ ALT: () => {
op = $.CONSUME(t.Minus) ;
}},
{ ALT: () => {
op = $.CONSUME(t.Exclamation);
}}
]
)
value = $.SUBRULE($.UnaryExpression);
// Obviously, nothing to in case of t.Plus
if (tokenMatcher(op, t.Minus)) {
value *= -1;
} else if (tokenMatcher(op, t.Exclamation)) {
value = !value;
}
}
}
])
return value;
})
$.RULE("BinaryExpression", () => {
let value, op, rhsValue;
value = $.SUBRULE($.UnaryExpression);
$.MANY(() => {
$.OR(
[
// flat list of binary operators
{ ALT: () => op = $.CONSUME(t.VerticalBarVerticalBar) },
{ ALT: () => op = $.CONSUME(t.AmpersandAmpersand) },
{ ALT: () => op = $.CONSUME(t.AbsEqualityOperator) },
{ ALT: () => op = $.CONSUME(t.AbsRelationalOperator) },
{
ALT: () =>
op = $.CONSUME(t.AbsMultiplicativeOperator)
},
{ ALT: () => op = $.CONSUME(t.AbsAdditiveOperator) }
]
)
rhsValue = $.SUBRULE2($.UnaryExpression)
if (tokenMatcher(op, t.VerticalBarVerticalBar)) {
value = value || rhsValue;
} else if (tokenMatcher(op, t.AmpersandAmpersand)) {
value = value && rhsValue;
} else if (tokenMatcher(op, t.AbsEqualityOperator)) {
if (tokenMatcher(op, t.EqEq)) {
value = value === rhsValue;
}
else if (tokenMatcher(op, t.NotEq)) {
value = vallue !== rhsValue;
}
} else if (tokenMatcher(op, t.AbsRelationalOperator)) {
if (tokenMatcher(op, t.Less)) {
value = value < rhsValue;
} else if (tokenMatcher(op, t.Greater)) {
value = value > rhsValue;
} else if (tokenMatcher(op, t.LessEq)) {
value = value <= rhsValue;
} else if (tokenMatcher(op, t.GreaterEq)) {
value = value >= rhsValue;
}
} else if (tokenMatcher(op, t.AbsMultiplicativeOperator)) {
if (tokenMatcher(op, t.Percent)) {
value = value % rhsValue;
}
} else if (tokenMatcher(op, t.AbsAdditiveOperator)) {
if (tokenMatcher(op, t.Plus)) {
value += rhsValue;
} else if (tokenMatcher(op, t.Minus)) {
value -= rhsValue;
}
}
})
return value;
})
// See 11.13
$.RULE("AssignmentExpression", () => {
let value, ifResult, elseResult;
value = $.SUBRULE($.BinaryExpression)
$.OPTION(() => {
$.CONSUME(t.Question)
ifResult = $.SUBRULE($.AssignmentExpression)
$.CONSUME(t.Colon)
elseResult = $.SUBRULE2($.AssignmentExpression)
value = value ? ifResult : elseResult;
})
return value;
})
// See 11.14
$.RULE("Expression", () => {
return $.SUBRULE($.AssignmentExpression);
})
// A.4 Statements
// See 12.2
$.RULE("VariableStatement", () => {
$.SUBRULE($.VariableDeclarationList);
$.CONSUME(t.Semicolon, DISABLE_SEMICOLON_INSERTION)
})
// See 12.2
$.RULE("VariableDeclarationList", () => {
$.SUBRULE($.VariableDeclaration)
})
// See 12.2
$.RULE("VariableDeclaration", () => {
const id = $.CONSUME(t.Identifier).image;
const value = $.SUBRULE($.Initialiser);
records[id] = value;
})
// See 12.2
$.RULE("Initialiser", () => {
$.CONSUME(t.Eq)
return $.SUBRULE($.AssignmentExpression);
})
// See 12.9
$.RULE("ReturnStatement", () => {
$.CONSUME(t.ReturnTok)
const value = $.SUBRULE($.Expression)
$.CONSUME(t.Semicolon, DISABLE_SEMICOLON_INSERTION)
return value;
})
$.RULE("Script", () => {
$.MANY(() => {
$.SUBRULE($.VariableStatement)
})
return $.SUBRULE($.ReturnStatement);
});
this.performSelfAnalysis()
}
/*
* Link https://www.ecma-international.org/ecma-262/5.1/#sec-7.9.1
* Automatic semicolon insertion implementation.
* The spec defines the insertion in terms of encountering an "offending"
* token and then inserting a semicolon under one of three basic rules.
* 1. Offending token is after a lineTerminator.
* 2. Offending token is a '}' RCurly.
* 3. Reached EOF but failed to parse a complete ECMAScript Program.
*
* In addition there are two overriding conditions on these rules.
* 1. do not insert if the semicolon would then be parsed as an empty statement.
* 2. do not If that semicolon would become one of the two semicolons in the header of a for statement.
*
* The implementation approaches this problem in a slightly different but equivalent approach:
*
* anytime a semicolon should be consumed AND
* the nextToken is not a semicolon AND
* the context is one that allows semicolon insertion (not in a for header or empty Statement) AND
* one of the 3 basic rules match
* ---------------------------------->
* THEN insert a semicolon
*
* Note that the context information is passed as the 'trySemiColonInsertion' argument
* to the CONSUME parsing DSL method
*/
canAndShouldDoSemiColonInsertion() {
const nextToken = this.LA(1)
const isNextTokenSemiColon = tokenMatcher(nextToken, t.Semicolon)
return (
isNextTokenSemiColon === false &&
(this.lineTerminatorHere() || // basic rule 1a and 3
tokenMatcher(nextToken, t.RCurly) || // basic rule 1b
tokenMatcher(nextToken, EOF))
) // basic rule 2
}
// // TODO: performance: semicolon insertion costs 5-10% of runtime, can this be improved?
CONSUME(tokClass, trySemiColonInsertion) {
if (
trySemiColonInsertion === true &&
this.canAndShouldDoSemiColonInsertion()
) {
return insertedSemiColon
}
return this.SUPER_CONSUME(tokClass)
}
CONSUME2(tokClass, trySemiColonInsertion) {
if (
trySemiColonInsertion === true &&
this.canAndShouldDoSemiColonInsertion()
) {
return insertedSemiColon
}
return this.SUPER_CONSUME2(tokClass)
}
// TODO: implement once the parser builds some data structure we can explore.
// in the case of "for (x in y)" form.
// the "IN" is only allowed if x is a left hand side expression
// https://www.ecma-international.org/ecma-262/5.1/index.html#sec-12.6
// so this method must verify that the exp parameter fulfills this condition.
canInComeAfterExp(exp) {
// TODO: temp implemntatoin, will always allow IN style iteration for now.
return true
}
noLineTerminatorHere() {
return !this.lineTerminatorHere()
}
lineTerminatorHere() {
const prevToken = this.LA(0)
const nextToken = this.LA(1)
const seekStart = prevToken.endOffset
const seekEnd = nextToken.startOffset - 1
let i = seekStart
while (i < seekEnd) {
const code = this._orgText.charCodeAt(i)
if (
code === 10 ||
code === 13 ||
code === 0x2028 ||
code === 0x2029
) {
return true
}
i++
}
return false
}
}
const insertedSemiColon = {
tokenTypeIdx: t.Semicolon.tokenTypeIdx,
image: ";",
startOffset: NaN,
endOffset: NaN,
automaticallyInserted: true
}
module.exports = {
ConstraintScriptParser
}
"use strict"
/*
* Spec: https://www.ecma-international.org/ecma-262/5.1/#sec-7
* important notes:
* * The Tokens class hierarchy in this module is based upon, but does not precisely match the spec's hierarchy.
* Instead the hierarchy is meant to provide easy categorization/classification of the tokens for "future phases"
* such as: parsing/syntax highlighting/refactoring
*/
const { createToken } = require("chevrotain")
// Link: https://www.ecma-international.org/ecma-262/5.1/#sec-7.2
const Whitespace = createToken({ name: "Whitespace" })
// Link: https://www.ecma-international.org/ecma-262/5.1/#sec-7.3
const LineTerminator = createToken({
name: "LineTerminator",
categories: Whitespace
})
// Link: https://www.ecma-international.org/ecma-262/5.1/#sec-7.6
const IdentifierName = createToken({ name: "IdentifierName" })
const AbsAnyKeyword = createToken({
name: "AbsAnyKeyword",
categories: IdentifierName
})
const AbsKeyword = createToken({
name: "AbsKeyword",
categories: AbsAnyKeyword
})
const ReturnTok = createToken({ name: "ReturnTok", categories: AbsKeyword })
// An IdentifierName, but not a reservedKeyword
const Identifier = createToken({
name: "Identifier",
categories: IdentifierName
})
// Link: https://www.ecma-international.org/ecma-262/5.1/#sec-7.7
const AbsPunctuator = createToken({ name: "AbsPunctuator" })
const LParen = createToken({ name: "LParen", categories: AbsPunctuator })
const RParen = createToken({ name: "RParen", categories: AbsPunctuator })
const Semicolon = createToken({ name: "Semicolon", categories: AbsPunctuator })
const Exclamation = createToken({
name: "Exclamation",
categories: AbsPunctuator
})
const AmpersandAmpersand = createToken({
name: "AmpersandAmpersand",
categories: AbsPunctuator
})
const VerticalBarVerticalBar = createToken({
name: "VerticalBarVerticalBar",
categories: AbsPunctuator
})
const Question = createToken({ name: "Question", categories: AbsPunctuator })
const Colon = createToken({ name: "Colon", categories: AbsPunctuator })
const AbsMultiplicativeOperator = createToken({
name: "AbsMultiplicativeOperator",
categories: AbsPunctuator
})
const Percent = createToken({
name: "Percent",
categories: AbsMultiplicativeOperator
})
const AbsAdditiveOperator = createToken({
name: "AbsAdditiveOperator",
categories: AbsPunctuator
})
const Plus = createToken({ name: "Plus", categories: AbsAdditiveOperator })
const Minus = createToken({ name: "Minus", categories: AbsAdditiveOperator })
const AbsRelationalOperator = createToken({
name: "AbsRelationalOperator",
categories: AbsPunctuator
})
const Less = createToken({ name: "Less", categories: AbsRelationalOperator })
const Greater = createToken({
name: "Greater",
categories: AbsRelationalOperator
})
const LessEq = createToken({
name: "LessEq",
categories: AbsRelationalOperator
})
const GreaterEq = createToken({
name: "GreaterEq",
categories: AbsRelationalOperator
})
const AbsEqualityOperator = createToken({
name: "AbsEqualityOperator",
categories: AbsPunctuator
})
const EqEq = createToken({ name: "EqEq", categories: AbsEqualityOperator })
const NotEq = createToken({ name: "NotEq", categories: AbsEqualityOperator })
const AbsAssignmentOperator = createToken({
name: "AbsAssignmentOperator",
categories: AbsPunctuator
})
const Eq = createToken({ name: "Eq", categories: AbsAssignmentOperator })
// Link: https://www.ecma-international.org/ecma-262/5.1/#sec-7.8
const AbsLiteral = createToken({ name: "AbsLiteral" })
const AbsBooleanLiteral = createToken({
name: "AbsBooleanLiteral",
categories: AbsLiteral
})
const TrueTok = createToken({
name: "TrueTok",
categories: [AbsBooleanLiteral, AbsKeyword]
})
const FalseTok = createToken({
name: "FalseTok",
categories: [AbsBooleanLiteral, AbsKeyword]
})
const NumericLiteral = createToken({
name: "NumericLiteral",
categories: AbsLiteral
})
module.exports = {
Whitespace,
LineTerminator,
IdentifierName,
AbsAnyKeyword,
AbsKeyword,
ReturnTok,
Identifier,
AbsPunctuator,
LParen,
RParen,
Semicolon,
Exclamation,
AmpersandAmpersand,
VerticalBarVerticalBar,
Question,
Colon,
AbsMultiplicativeOperator,
Percent,
AbsAdditiveOperator,
Plus,
Minus,
AbsRelationalOperator,
Less,
Greater,
LessEq,
GreaterEq,
AbsEqualityOperator,
EqEq,
NotEq,
AbsAssignmentOperator,
Eq,
AbsLiteral,
AbsBooleanLiteral,
TrueTok,
FalseTok,
NumericLiteral,
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment