Created
April 12, 2019 16:16
-
-
Save PanJarda/8cf0502165e990cf6da4309ed893a429 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict" | |
var LEXICAL_GRAMMAR = { | |
"SingleLineComment": "Comment", | |
"MultiLineComment": "Comment", | |
"/*": { | |
"MultilineCommentChars": { | |
"*/": "MultiLineComment" | |
}, | |
"*/": "MultiLineComment" | |
}, | |
"MultiLineNotAsteriskChar": { | |
"MultiLineCommentChars": "MultiLineCommentChars", | |
}, | |
"*": { | |
"_end": ["MultiLineCommentChars", "PostAsteriskCommentChars"], | |
"PostAsterisCommentChars": ["MultiLineCommentChars", "PostAsteriskCommentChars"] | |
}, | |
"MultiLineNotForwardSlashOrAsteriskChar": { | |
"_end": "PostAsteriskCommentChars", | |
"MultiLineCommentChars": "PostAsteriskCommentChars", | |
}, | |
"SourceCharacter": { | |
"_not": { | |
"*": ["MultiLineNotAsteriskChar", "MultiLineNotForwardSlashOrAsteriskChar"], | |
"/": "MultiLineNotForwardSlashOrAsteriskChar", | |
"LineTerminator": "SingleLineCommentChar" | |
} | |
}, | |
"//": { | |
"_end": "SingleLineComment", | |
"SingleLineCommentChars": "SingleLineComment" | |
}, | |
"SingleLineCommentChar": { | |
"_end": "SingleLineCommentChars", | |
"SingleLineCommentChars": "SingleLineCommentChars" | |
}, | |
"StringLiteral": "Token", | |
"Punctuator": "Token", | |
"NumericalLiteral": "Token", | |
"Identifier": "Token", | |
"IdentifierName": { | |
"_not": { | |
"ReservedWord": "Identifier" | |
}, | |
"IdentifierPart": "IdentifierName" | |
}, | |
"IdentifierStart": ["IdentifierName","IdentifierPart"], | |
"UnicodeLetter": "IdentifierStart", | |
"$": "IdentifierStart", | |
"_": "IdentifierStart", | |
"\\": { | |
"UnicodeEscapeSequence": "IdentifierStart" | |
}, | |
"UnicodeCombiningMark": "IdentifierPart", | |
"UnicodeDigit": "IdentifierPart", | |
"UnicodeConnectorPunctuation": "IdentifierPart", | |
"<ZWNJ>": "IdentifierPart", | |
"<ZWJ>": "IdentifierPart", | |
"_Ll": "UnicodeLetter", | |
"_Lu": "UnicodeLetter", | |
"_Lt": "UnicodeLetter", | |
"_Lm": "UnicodeLetter", | |
"_Lo": "UnicodeLetter", | |
"_Nl": "UnicodeLetter", | |
"_Mn": "UnicodeCombiningMark", | |
"_Mc": "UnicodeCombiningMark", | |
"_Nd": "UnicodeDigit", | |
"_Pc": "UnicodeConnectorPunctuation", | |
"Keyword": "ReservedWord", | |
"FutureReservedWord": "ReservedWord", | |
"NullLiteral": "ReservedWord", | |
"BooleanLiteral": "ReservedWord", | |
"break": "Keyword", | |
"case": "Keyword", | |
"catch": "Keyword", | |
"continue": "Keyword", | |
"debugger": "Keyword", | |
"default": "Keyword", | |
"delete": "Keyword", | |
"do": "Keyword", | |
"else": "Keyword", | |
"finally": "Keyword", | |
"for": "Keyword", | |
"function": "Keyword", | |
"if": "Keyword", | |
"in": "Keyword", | |
"instanceof": "Keyword", | |
"new": "Keyword", | |
"return": "Keyword", | |
"switch": "Keyword", | |
"this": "Keyword", | |
"throw": "Keyword", | |
"try": "Keyword", | |
"typeof": "Keyword", | |
"var": "Keyword", | |
"void": "Keyword", | |
"while": "Keyword", | |
"with": "Keyword", | |
"class": "FutureReservedWord", | |
"const": "FutureReservedWord", | |
"enum": "FutureReservedWord", | |
"export": "FutureReservedWord", | |
"extends": "FutureReservedWord", | |
"import": "FutureReservedWord", | |
"super": "FutureReservedWord", | |
"{": "Punctuator", | |
"}": "Punctuator", | |
"(": "Punctuator", | |
")": "Punctuator", | |
"[": "Punctuator", | |
"]": "Punctuator", | |
".": { | |
"_end": "Punctuator", | |
"DecimalDigits": { | |
"_end": "DecimalLiteral", | |
"ExponentPart": "DecimalLiteral" | |
} | |
}, | |
";": "Punctuator", | |
",": "Punctuator", | |
"<": "Punctuator", | |
">": "Punctuator", | |
"<=": "Punctuator", | |
">=": "Punctuator", | |
"==": "Punctuator", | |
"!=": "Punctuator", | |
"===": "Punctuator", | |
"!==": "Punctuator", | |
"+": { | |
"_end": "Punctuator", | |
"DecimalDigits": "SignedInteger", | |
}, | |
"-": { | |
"_end": "Punctuator", | |
"DecimalDigits": "SignedInteger", | |
}, | |
"*": "Punctuator", | |
"%": "Punctuator", | |
"++": "Punctuator", | |
"--": "Punctuator", | |
"<<": "Punctuator", | |
">>": "Punctuator", | |
">>>": "Punctuator", | |
"&": "Punctuator", | |
"|": "Punctuator", | |
"^": "Punctuator", | |
"!": "Punctuator", | |
"~": "Punctuator", | |
"&&": "Punctuator", | |
"||": "Punctuator", | |
"?": "Punctuator", | |
":": "Punctuator", | |
"=": "Punctuator", | |
"+=": "Punctuator", | |
"-=": "Punctuator", | |
"*=": "Punctuator", | |
"%=": "Punctuator", | |
"<<=": "Punctuator", | |
">>=": "Punctuator", | |
">>>=": "Punctuator", | |
"&=": "Punctuator", | |
"|=": "Punctuator", | |
"^=": "Punctuator", | |
"/": { | |
"_end": "DivPunctuator", | |
"=": "DivPunctuator" | |
}, | |
"NullLiteral": "Literal", | |
"BooleanLiteral": "Literal", | |
"NumericLiteral": "Literal", | |
"StringLiteral": "Literal", | |
"RegularExpressionLiteral": "Literal", | |
"null": "NullLiteral", | |
"true": "BooleanLiteral", | |
"false": "BooleanLiteral", | |
"DecimalLiteral": "NumericLiteral", | |
"HexIntegerLiteral": "NumericLiteral", | |
"DecimalIntegerLiteral": { | |
"_end": "DecimalLiteral", | |
"ExponentPart": "DecimalLiteral", | |
".": { | |
"_end": "DecimalLiteral", | |
"DecimalDigits": { | |
"_end": "DecimalLiteral", | |
"ExponentPart": "DecimalLiteral" | |
} | |
} | |
}, | |
"0": ["DecimalIntegerLiteral", "DecimalDigit", "HexDigit"], | |
"NonZeroDigit": { | |
"_end": "DecimalIntegerLiteral", | |
"DecimalDigits": "DecimalIntegerLiteral" | |
}, | |
"1": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"2": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"3": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"4": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"5": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"6": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"7": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"8": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"9": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
"ExponentIndicator": { | |
"SignedInteger": "ExponentPart", | |
}, | |
"e": ["ExponentIndicator", "HexDigit"], | |
"E": ["ExponentIndicator", "HexDigit"], | |
"DecimalDigits": "SignedInteger", | |
"0": { | |
"x": { | |
"HexDigit": "HexIntegerLiteral", | |
} | |
}, | |
"0X": { | |
"HexDigit": "HexIntegerLiteral", | |
}, | |
"HexIntegerLiteral": { | |
"HexDigit": "HexIntegerLiteral" | |
}, | |
"a": "HexDigit", | |
"b": "HexDigit", | |
"c": "HexDigit", | |
"d": "HexDigit", | |
"f": "HexDigit", | |
"A": "HexDigit", | |
"B": "HexDigit", | |
"C": "HexDigit", | |
"D": "HexDigit", | |
"F": "HexDigit", | |
}; | |
/* | |
TODO: oddelit numeric string grammar | |
*/ | |
function GrammarWalker(grammar) { | |
this._grammar = this._rule = grammar; | |
this._resPath = []; | |
this._not = false; | |
} | |
GrammarWalker.prototype.gen = function(symbol) { | |
if (this._not) { | |
var rules = Object.keys(this._rule), | |
index = rules.indexOf(symbol), | |
res = []; | |
if (index > -1) { | |
rules.splice(index, 1); | |
} | |
for (var key in rules) { | |
res.push(this._rule[key]); | |
}; | |
return res.length > 0 ? res : true; | |
} | |
if (symbol in this._rule) { | |
if (typeof this._rule[symbol] === "string") { | |
var newSymbol = this._rule[symbol]; | |
this._resPath.push(newSymbol); | |
this._rule = this._grammar; | |
var res = this.gen(newSymbol); | |
this._rule = this._grammar; | |
return res; | |
} | |
// prepare for next symbol | |
this._rule = this._rule[symbol]; | |
this._resPath.push(symbol); | |
return false; | |
} else if (this._rule !== this._grammar) { | |
this._rule = this._grammar; | |
var res = this.gen(symbol); | |
this._rule = this._grammar; | |
return false; | |
} | |
if ("_end" in this._rule) { | |
var newSymbol = this._rule["_end"]; | |
this._resPath.push(newSymbol); | |
this._rule = this._grammar; | |
return this.gen(newSymbol); | |
} | |
if ("_not" in this._rule) { | |
this._not = !this._not; | |
this._rule = this._rule["_not"]; | |
return this.gen(symbol); | |
} | |
return { path: this._resPath }; | |
}; | |
function Lexer(source) { | |
this._source = source; | |
this._position = 0; | |
this._gw = new GrammarWalker(LEXICAL_GRAMMAR); | |
this._tokens = []; | |
} | |
Lexer.prototype.next = function() { | |
var ch = this._source.charAt(this._position), | |
res; | |
if (ch === "") { | |
return false; | |
}; | |
res = this._gw.gen(ch); | |
res ? this._tokens.push(res) : false; | |
this._position++; | |
return true; | |
}; | |
var lexer = new Lexer("0xab"); | |
while (lexer.next()) {} | |
console.log(lexer._tokens); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment