Skip to content

Instantly share code, notes, and snippets.

@PanJarda
Created April 12, 2019 16:16
Show Gist options
  • Save PanJarda/8cf0502165e990cf6da4309ed893a429 to your computer and use it in GitHub Desktop.
Save PanJarda/8cf0502165e990cf6da4309ed893a429 to your computer and use it in GitHub Desktop.
"use strict"
var LEXICAL_GRAMMAR = {
"SingleLineComment": "Comment",
"MultiLineComment": "Comment",
"/*": {
"MultilineCommentChars": {
"*/": "MultiLineComment"
},
"*/": "MultiLineComment"
},
"MultiLineNotAsteriskChar": {
"MultiLineCommentChars": "MultiLineCommentChars",
},
"*": {
"_end": ["MultiLineCommentChars", "PostAsteriskCommentChars"],
"PostAsterisCommentChars": ["MultiLineCommentChars", "PostAsteriskCommentChars"]
},
"MultiLineNotForwardSlashOrAsteriskChar": {
"_end": "PostAsteriskCommentChars",
"MultiLineCommentChars": "PostAsteriskCommentChars",
},
"SourceCharacter": {
"_not": {
"*": ["MultiLineNotAsteriskChar", "MultiLineNotForwardSlashOrAsteriskChar"],
"/": "MultiLineNotForwardSlashOrAsteriskChar",
"LineTerminator": "SingleLineCommentChar"
}
},
"//": {
"_end": "SingleLineComment",
"SingleLineCommentChars": "SingleLineComment"
},
"SingleLineCommentChar": {
"_end": "SingleLineCommentChars",
"SingleLineCommentChars": "SingleLineCommentChars"
},
"StringLiteral": "Token",
"Punctuator": "Token",
"NumericalLiteral": "Token",
"Identifier": "Token",
"IdentifierName": {
"_not": {
"ReservedWord": "Identifier"
},
"IdentifierPart": "IdentifierName"
},
"IdentifierStart": ["IdentifierName","IdentifierPart"],
"UnicodeLetter": "IdentifierStart",
"$": "IdentifierStart",
"_": "IdentifierStart",
"\\": {
"UnicodeEscapeSequence": "IdentifierStart"
},
"UnicodeCombiningMark": "IdentifierPart",
"UnicodeDigit": "IdentifierPart",
"UnicodeConnectorPunctuation": "IdentifierPart",
"<ZWNJ>": "IdentifierPart",
"<ZWJ>": "IdentifierPart",
"_Ll": "UnicodeLetter",
"_Lu": "UnicodeLetter",
"_Lt": "UnicodeLetter",
"_Lm": "UnicodeLetter",
"_Lo": "UnicodeLetter",
"_Nl": "UnicodeLetter",
"_Mn": "UnicodeCombiningMark",
"_Mc": "UnicodeCombiningMark",
"_Nd": "UnicodeDigit",
"_Pc": "UnicodeConnectorPunctuation",
"Keyword": "ReservedWord",
"FutureReservedWord": "ReservedWord",
"NullLiteral": "ReservedWord",
"BooleanLiteral": "ReservedWord",
"break": "Keyword",
"case": "Keyword",
"catch": "Keyword",
"continue": "Keyword",
"debugger": "Keyword",
"default": "Keyword",
"delete": "Keyword",
"do": "Keyword",
"else": "Keyword",
"finally": "Keyword",
"for": "Keyword",
"function": "Keyword",
"if": "Keyword",
"in": "Keyword",
"instanceof": "Keyword",
"new": "Keyword",
"return": "Keyword",
"switch": "Keyword",
"this": "Keyword",
"throw": "Keyword",
"try": "Keyword",
"typeof": "Keyword",
"var": "Keyword",
"void": "Keyword",
"while": "Keyword",
"with": "Keyword",
"class": "FutureReservedWord",
"const": "FutureReservedWord",
"enum": "FutureReservedWord",
"export": "FutureReservedWord",
"extends": "FutureReservedWord",
"import": "FutureReservedWord",
"super": "FutureReservedWord",
"{": "Punctuator",
"}": "Punctuator",
"(": "Punctuator",
")": "Punctuator",
"[": "Punctuator",
"]": "Punctuator",
".": {
"_end": "Punctuator",
"DecimalDigits": {
"_end": "DecimalLiteral",
"ExponentPart": "DecimalLiteral"
}
},
";": "Punctuator",
",": "Punctuator",
"<": "Punctuator",
">": "Punctuator",
"<=": "Punctuator",
">=": "Punctuator",
"==": "Punctuator",
"!=": "Punctuator",
"===": "Punctuator",
"!==": "Punctuator",
"+": {
"_end": "Punctuator",
"DecimalDigits": "SignedInteger",
},
"-": {
"_end": "Punctuator",
"DecimalDigits": "SignedInteger",
},
"*": "Punctuator",
"%": "Punctuator",
"++": "Punctuator",
"--": "Punctuator",
"<<": "Punctuator",
">>": "Punctuator",
">>>": "Punctuator",
"&": "Punctuator",
"|": "Punctuator",
"^": "Punctuator",
"!": "Punctuator",
"~": "Punctuator",
"&&": "Punctuator",
"||": "Punctuator",
"?": "Punctuator",
":": "Punctuator",
"=": "Punctuator",
"+=": "Punctuator",
"-=": "Punctuator",
"*=": "Punctuator",
"%=": "Punctuator",
"<<=": "Punctuator",
">>=": "Punctuator",
">>>=": "Punctuator",
"&=": "Punctuator",
"|=": "Punctuator",
"^=": "Punctuator",
"/": {
"_end": "DivPunctuator",
"=": "DivPunctuator"
},
"NullLiteral": "Literal",
"BooleanLiteral": "Literal",
"NumericLiteral": "Literal",
"StringLiteral": "Literal",
"RegularExpressionLiteral": "Literal",
"null": "NullLiteral",
"true": "BooleanLiteral",
"false": "BooleanLiteral",
"DecimalLiteral": "NumericLiteral",
"HexIntegerLiteral": "NumericLiteral",
"DecimalIntegerLiteral": {
"_end": "DecimalLiteral",
"ExponentPart": "DecimalLiteral",
".": {
"_end": "DecimalLiteral",
"DecimalDigits": {
"_end": "DecimalLiteral",
"ExponentPart": "DecimalLiteral"
}
}
},
"0": ["DecimalIntegerLiteral", "DecimalDigit", "HexDigit"],
"NonZeroDigit": {
"_end": "DecimalIntegerLiteral",
"DecimalDigits": "DecimalIntegerLiteral"
},
"1": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"2": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"3": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"4": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"5": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"6": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"7": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"8": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"9": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
"ExponentIndicator": {
"SignedInteger": "ExponentPart",
},
"e": ["ExponentIndicator", "HexDigit"],
"E": ["ExponentIndicator", "HexDigit"],
"DecimalDigits": "SignedInteger",
"0": {
"x": {
"HexDigit": "HexIntegerLiteral",
}
},
"0X": {
"HexDigit": "HexIntegerLiteral",
},
"HexIntegerLiteral": {
"HexDigit": "HexIntegerLiteral"
},
"a": "HexDigit",
"b": "HexDigit",
"c": "HexDigit",
"d": "HexDigit",
"f": "HexDigit",
"A": "HexDigit",
"B": "HexDigit",
"C": "HexDigit",
"D": "HexDigit",
"F": "HexDigit",
};
/*
TODO: oddelit numeric string grammar
*/
function GrammarWalker(grammar) {
this._grammar = this._rule = grammar;
this._resPath = [];
this._not = false;
}
GrammarWalker.prototype.gen = function(symbol) {
if (this._not) {
var rules = Object.keys(this._rule),
index = rules.indexOf(symbol),
res = [];
if (index > -1) {
rules.splice(index, 1);
}
for (var key in rules) {
res.push(this._rule[key]);
};
return res.length > 0 ? res : true;
}
if (symbol in this._rule) {
if (typeof this._rule[symbol] === "string") {
var newSymbol = this._rule[symbol];
this._resPath.push(newSymbol);
this._rule = this._grammar;
var res = this.gen(newSymbol);
this._rule = this._grammar;
return res;
}
// prepare for next symbol
this._rule = this._rule[symbol];
this._resPath.push(symbol);
return false;
} else if (this._rule !== this._grammar) {
this._rule = this._grammar;
var res = this.gen(symbol);
this._rule = this._grammar;
return false;
}
if ("_end" in this._rule) {
var newSymbol = this._rule["_end"];
this._resPath.push(newSymbol);
this._rule = this._grammar;
return this.gen(newSymbol);
}
if ("_not" in this._rule) {
this._not = !this._not;
this._rule = this._rule["_not"];
return this.gen(symbol);
}
return { path: this._resPath };
};
function Lexer(source) {
this._source = source;
this._position = 0;
this._gw = new GrammarWalker(LEXICAL_GRAMMAR);
this._tokens = [];
}
Lexer.prototype.next = function() {
var ch = this._source.charAt(this._position),
res;
if (ch === "") {
return false;
};
res = this._gw.gen(ch);
res ? this._tokens.push(res) : false;
this._position++;
return true;
};
var lexer = new Lexer("0xab");
while (lexer.next()) {}
console.log(lexer._tokens);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment