Created
April 12, 2019 16:16
-
-
Save PanJarda/8cf0502165e990cf6da4309ed893a429 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| "use strict" | |
| var LEXICAL_GRAMMAR = { | |
| "SingleLineComment": "Comment", | |
| "MultiLineComment": "Comment", | |
| "/*": { | |
| "MultilineCommentChars": { | |
| "*/": "MultiLineComment" | |
| }, | |
| "*/": "MultiLineComment" | |
| }, | |
| "MultiLineNotAsteriskChar": { | |
| "MultiLineCommentChars": "MultiLineCommentChars", | |
| }, | |
| "*": { | |
| "_end": ["MultiLineCommentChars", "PostAsteriskCommentChars"], | |
| "PostAsterisCommentChars": ["MultiLineCommentChars", "PostAsteriskCommentChars"] | |
| }, | |
| "MultiLineNotForwardSlashOrAsteriskChar": { | |
| "_end": "PostAsteriskCommentChars", | |
| "MultiLineCommentChars": "PostAsteriskCommentChars", | |
| }, | |
| "SourceCharacter": { | |
| "_not": { | |
| "*": ["MultiLineNotAsteriskChar", "MultiLineNotForwardSlashOrAsteriskChar"], | |
| "/": "MultiLineNotForwardSlashOrAsteriskChar", | |
| "LineTerminator": "SingleLineCommentChar" | |
| } | |
| }, | |
| "//": { | |
| "_end": "SingleLineComment", | |
| "SingleLineCommentChars": "SingleLineComment" | |
| }, | |
| "SingleLineCommentChar": { | |
| "_end": "SingleLineCommentChars", | |
| "SingleLineCommentChars": "SingleLineCommentChars" | |
| }, | |
| "StringLiteral": "Token", | |
| "Punctuator": "Token", | |
| "NumericalLiteral": "Token", | |
| "Identifier": "Token", | |
| "IdentifierName": { | |
| "_not": { | |
| "ReservedWord": "Identifier" | |
| }, | |
| "IdentifierPart": "IdentifierName" | |
| }, | |
| "IdentifierStart": ["IdentifierName","IdentifierPart"], | |
| "UnicodeLetter": "IdentifierStart", | |
| "$": "IdentifierStart", | |
| "_": "IdentifierStart", | |
| "\\": { | |
| "UnicodeEscapeSequence": "IdentifierStart" | |
| }, | |
| "UnicodeCombiningMark": "IdentifierPart", | |
| "UnicodeDigit": "IdentifierPart", | |
| "UnicodeConnectorPunctuation": "IdentifierPart", | |
| "<ZWNJ>": "IdentifierPart", | |
| "<ZWJ>": "IdentifierPart", | |
| "_Ll": "UnicodeLetter", | |
| "_Lu": "UnicodeLetter", | |
| "_Lt": "UnicodeLetter", | |
| "_Lm": "UnicodeLetter", | |
| "_Lo": "UnicodeLetter", | |
| "_Nl": "UnicodeLetter", | |
| "_Mn": "UnicodeCombiningMark", | |
| "_Mc": "UnicodeCombiningMark", | |
| "_Nd": "UnicodeDigit", | |
| "_Pc": "UnicodeConnectorPunctuation", | |
| "Keyword": "ReservedWord", | |
| "FutureReservedWord": "ReservedWord", | |
| "NullLiteral": "ReservedWord", | |
| "BooleanLiteral": "ReservedWord", | |
| "break": "Keyword", | |
| "case": "Keyword", | |
| "catch": "Keyword", | |
| "continue": "Keyword", | |
| "debugger": "Keyword", | |
| "default": "Keyword", | |
| "delete": "Keyword", | |
| "do": "Keyword", | |
| "else": "Keyword", | |
| "finally": "Keyword", | |
| "for": "Keyword", | |
| "function": "Keyword", | |
| "if": "Keyword", | |
| "in": "Keyword", | |
| "instanceof": "Keyword", | |
| "new": "Keyword", | |
| "return": "Keyword", | |
| "switch": "Keyword", | |
| "this": "Keyword", | |
| "throw": "Keyword", | |
| "try": "Keyword", | |
| "typeof": "Keyword", | |
| "var": "Keyword", | |
| "void": "Keyword", | |
| "while": "Keyword", | |
| "with": "Keyword", | |
| "class": "FutureReservedWord", | |
| "const": "FutureReservedWord", | |
| "enum": "FutureReservedWord", | |
| "export": "FutureReservedWord", | |
| "extends": "FutureReservedWord", | |
| "import": "FutureReservedWord", | |
| "super": "FutureReservedWord", | |
| "{": "Punctuator", | |
| "}": "Punctuator", | |
| "(": "Punctuator", | |
| ")": "Punctuator", | |
| "[": "Punctuator", | |
| "]": "Punctuator", | |
| ".": { | |
| "_end": "Punctuator", | |
| "DecimalDigits": { | |
| "_end": "DecimalLiteral", | |
| "ExponentPart": "DecimalLiteral" | |
| } | |
| }, | |
| ";": "Punctuator", | |
| ",": "Punctuator", | |
| "<": "Punctuator", | |
| ">": "Punctuator", | |
| "<=": "Punctuator", | |
| ">=": "Punctuator", | |
| "==": "Punctuator", | |
| "!=": "Punctuator", | |
| "===": "Punctuator", | |
| "!==": "Punctuator", | |
| "+": { | |
| "_end": "Punctuator", | |
| "DecimalDigits": "SignedInteger", | |
| }, | |
| "-": { | |
| "_end": "Punctuator", | |
| "DecimalDigits": "SignedInteger", | |
| }, | |
| "*": "Punctuator", | |
| "%": "Punctuator", | |
| "++": "Punctuator", | |
| "--": "Punctuator", | |
| "<<": "Punctuator", | |
| ">>": "Punctuator", | |
| ">>>": "Punctuator", | |
| "&": "Punctuator", | |
| "|": "Punctuator", | |
| "^": "Punctuator", | |
| "!": "Punctuator", | |
| "~": "Punctuator", | |
| "&&": "Punctuator", | |
| "||": "Punctuator", | |
| "?": "Punctuator", | |
| ":": "Punctuator", | |
| "=": "Punctuator", | |
| "+=": "Punctuator", | |
| "-=": "Punctuator", | |
| "*=": "Punctuator", | |
| "%=": "Punctuator", | |
| "<<=": "Punctuator", | |
| ">>=": "Punctuator", | |
| ">>>=": "Punctuator", | |
| "&=": "Punctuator", | |
| "|=": "Punctuator", | |
| "^=": "Punctuator", | |
| "/": { | |
| "_end": "DivPunctuator", | |
| "=": "DivPunctuator" | |
| }, | |
| "NullLiteral": "Literal", | |
| "BooleanLiteral": "Literal", | |
| "NumericLiteral": "Literal", | |
| "StringLiteral": "Literal", | |
| "RegularExpressionLiteral": "Literal", | |
| "null": "NullLiteral", | |
| "true": "BooleanLiteral", | |
| "false": "BooleanLiteral", | |
| "DecimalLiteral": "NumericLiteral", | |
| "HexIntegerLiteral": "NumericLiteral", | |
| "DecimalIntegerLiteral": { | |
| "_end": "DecimalLiteral", | |
| "ExponentPart": "DecimalLiteral", | |
| ".": { | |
| "_end": "DecimalLiteral", | |
| "DecimalDigits": { | |
| "_end": "DecimalLiteral", | |
| "ExponentPart": "DecimalLiteral" | |
| } | |
| } | |
| }, | |
| "0": ["DecimalIntegerLiteral", "DecimalDigit", "HexDigit"], | |
| "NonZeroDigit": { | |
| "_end": "DecimalIntegerLiteral", | |
| "DecimalDigits": "DecimalIntegerLiteral" | |
| }, | |
| "1": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "2": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "3": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "4": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "5": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "6": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "7": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "8": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "9": ["DecimalDigit", "NonZeroDigit", "HexDigit"], | |
| "ExponentIndicator": { | |
| "SignedInteger": "ExponentPart", | |
| }, | |
| "e": ["ExponentIndicator", "HexDigit"], | |
| "E": ["ExponentIndicator", "HexDigit"], | |
| "DecimalDigits": "SignedInteger", | |
| "0": { | |
| "x": { | |
| "HexDigit": "HexIntegerLiteral", | |
| } | |
| }, | |
| "0X": { | |
| "HexDigit": "HexIntegerLiteral", | |
| }, | |
| "HexIntegerLiteral": { | |
| "HexDigit": "HexIntegerLiteral" | |
| }, | |
| "a": "HexDigit", | |
| "b": "HexDigit", | |
| "c": "HexDigit", | |
| "d": "HexDigit", | |
| "f": "HexDigit", | |
| "A": "HexDigit", | |
| "B": "HexDigit", | |
| "C": "HexDigit", | |
| "D": "HexDigit", | |
| "F": "HexDigit", | |
| }; | |
| /* | |
| TODO: oddelit numeric string grammar | |
| */ | |
| function GrammarWalker(grammar) { | |
| this._grammar = this._rule = grammar; | |
| this._resPath = []; | |
| this._not = false; | |
| } | |
| GrammarWalker.prototype.gen = function(symbol) { | |
| if (this._not) { | |
| var rules = Object.keys(this._rule), | |
| index = rules.indexOf(symbol), | |
| res = []; | |
| if (index > -1) { | |
| rules.splice(index, 1); | |
| } | |
| for (var key in rules) { | |
| res.push(this._rule[key]); | |
| }; | |
| return res.length > 0 ? res : true; | |
| } | |
| if (symbol in this._rule) { | |
| if (typeof this._rule[symbol] === "string") { | |
| var newSymbol = this._rule[symbol]; | |
| this._resPath.push(newSymbol); | |
| this._rule = this._grammar; | |
| var res = this.gen(newSymbol); | |
| this._rule = this._grammar; | |
| return res; | |
| } | |
| // prepare for next symbol | |
| this._rule = this._rule[symbol]; | |
| this._resPath.push(symbol); | |
| return false; | |
| } else if (this._rule !== this._grammar) { | |
| this._rule = this._grammar; | |
| var res = this.gen(symbol); | |
| this._rule = this._grammar; | |
| return false; | |
| } | |
| if ("_end" in this._rule) { | |
| var newSymbol = this._rule["_end"]; | |
| this._resPath.push(newSymbol); | |
| this._rule = this._grammar; | |
| return this.gen(newSymbol); | |
| } | |
| if ("_not" in this._rule) { | |
| this._not = !this._not; | |
| this._rule = this._rule["_not"]; | |
| return this.gen(symbol); | |
| } | |
| return { path: this._resPath }; | |
| }; | |
| function Lexer(source) { | |
| this._source = source; | |
| this._position = 0; | |
| this._gw = new GrammarWalker(LEXICAL_GRAMMAR); | |
| this._tokens = []; | |
| } | |
| Lexer.prototype.next = function() { | |
| var ch = this._source.charAt(this._position), | |
| res; | |
| if (ch === "") { | |
| return false; | |
| }; | |
| res = this._gw.gen(ch); | |
| res ? this._tokens.push(res) : false; | |
| this._position++; | |
| return true; | |
| }; | |
| var lexer = new Lexer("0xab"); | |
| while (lexer.next()) {} | |
| console.log(lexer._tokens); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment