PanJarda · April 12, 2019 16:16
diff --git a/lexer2.js b/lexer2.js
 "use strict"

 var LEXICAL_GRAMMAR = {
 	"SingleLineComment": "Comment",
 	"MultiLineComment": "Comment",
 	"/*": {
 		"MultilineCommentChars": {
 			"*/": "MultiLineComment"
 		},
 		"*/": "MultiLineComment"
 	},
 	"MultiLineNotAsteriskChar": {
 		"MultiLineCommentChars": "MultiLineCommentChars",
 	},
 	"*": {
 		"_end": ["MultiLineCommentChars", "PostAsteriskCommentChars"],
 		"PostAsterisCommentChars": ["MultiLineCommentChars", "PostAsteriskCommentChars"]
 	},
 	"MultiLineNotForwardSlashOrAsteriskChar": {
 		"_end": "PostAsteriskCommentChars",
 		"MultiLineCommentChars": "PostAsteriskCommentChars",
 	},
 	"SourceCharacter": {
 		"_not": {
 			"*": ["MultiLineNotAsteriskChar", "MultiLineNotForwardSlashOrAsteriskChar"],
 			"/": "MultiLineNotForwardSlashOrAsteriskChar",
 			"LineTerminator": "SingleLineCommentChar"
 		}
 	},
 	"//": {
 		"_end": "SingleLineComment",
 		"SingleLineCommentChars": "SingleLineComment"
 	},
 	"SingleLineCommentChar": {
 		"_end": "SingleLineCommentChars",
 		"SingleLineCommentChars": "SingleLineCommentChars"
 	},
 	"StringLiteral": "Token",
 	"Punctuator": "Token",
 	"NumericalLiteral": "Token",
 	"Identifier": "Token",
 	"IdentifierName": {
 		"_not": {
 			"ReservedWord": "Identifier"
 		},
 		"IdentifierPart": "IdentifierName"
 	},
 	"IdentifierStart": ["IdentifierName","IdentifierPart"],
 	"UnicodeLetter": "IdentifierStart",
 	"$": "IdentifierStart",
 	"_": "IdentifierStart",
 	"\\": {
 		"UnicodeEscapeSequence": "IdentifierStart"
 	},
 	"UnicodeCombiningMark": "IdentifierPart",
 	"UnicodeDigit": "IdentifierPart",
 	"UnicodeConnectorPunctuation": "IdentifierPart",
 	"<ZWNJ>": "IdentifierPart",
 	"<ZWJ>": "IdentifierPart",
 	"_Ll": "UnicodeLetter",
 	"_Lu": "UnicodeLetter",
 	"_Lt": "UnicodeLetter",
 	"_Lm": "UnicodeLetter",
 	"_Lo": "UnicodeLetter",
 	"_Nl": "UnicodeLetter",
 	"_Mn": "UnicodeCombiningMark",
 	"_Mc": "UnicodeCombiningMark",
 	"_Nd": "UnicodeDigit",
 	"_Pc": "UnicodeConnectorPunctuation",
 	"Keyword": "ReservedWord",
 	"FutureReservedWord": "ReservedWord",
 	"NullLiteral": "ReservedWord",
 	"BooleanLiteral": "ReservedWord",
 	"break": "Keyword",
 	"case": "Keyword",
 	"catch": "Keyword",
 	"continue": "Keyword",
 	"debugger": "Keyword",
 	"default": "Keyword",
 	"delete": "Keyword",
 	"do": "Keyword",
 	"else": "Keyword",
 	"finally": "Keyword",
 	"for": "Keyword",
 	"function": "Keyword",
 	"if": "Keyword",
 	"in": "Keyword",
 	"instanceof": "Keyword",
 	"new": "Keyword",
 	"return": "Keyword",
 	"switch": "Keyword",
 	"this": "Keyword",
 	"throw": "Keyword",
 	"try": "Keyword",
 	"typeof": "Keyword",
 	"var": "Keyword",
 	"void": "Keyword",
 	"while": "Keyword",
 	"with": "Keyword",
 	"class": "FutureReservedWord",
 	"const": "FutureReservedWord",
 	"enum": "FutureReservedWord",
 	"export": "FutureReservedWord",
 	"extends": "FutureReservedWord",
 	"import": "FutureReservedWord",
 	"super": "FutureReservedWord",
 	"{": "Punctuator",
 	"}": "Punctuator",
 	"(": "Punctuator",
 	")": "Punctuator",
 	"[": "Punctuator",
 	"]": "Punctuator",
 	".": {
 		"_end": "Punctuator",
 		"DecimalDigits": {
 			"_end": "DecimalLiteral",
 			"ExponentPart": "DecimalLiteral"
 		}
 	},
 	";": "Punctuator",
 	",": "Punctuator",
 	"<": "Punctuator",
 	">": "Punctuator",
 	"<=": "Punctuator",
 	">=": "Punctuator",
 	"==": "Punctuator",
 	"!=": "Punctuator",
 	"===": "Punctuator",
 	"!==": "Punctuator",
 	"+": {
 		"_end": "Punctuator",
 		"DecimalDigits": "SignedInteger",
 	},
 	"-": {
 		"_end": "Punctuator",
 		"DecimalDigits": "SignedInteger",
 	},
 	"*": "Punctuator",
 	"%": "Punctuator",
 	"++": "Punctuator",
 	"--": "Punctuator",
 	"<<": "Punctuator",
 	">>": "Punctuator",
 	">>>": "Punctuator",
 	"&": "Punctuator",
 	"|": "Punctuator",
 	"^": "Punctuator",
 	"!": "Punctuator",
 	"~": "Punctuator",
 	"&&": "Punctuator",
 	"||": "Punctuator",
 	"?": "Punctuator",
 	":": "Punctuator",
 	"=": "Punctuator",
 	"+=": "Punctuator",
 	"-=": "Punctuator",
 	"*=": "Punctuator",
 	"%=": "Punctuator",
 	"<<=": "Punctuator",
 	">>=": "Punctuator",
 	">>>=": "Punctuator",
 	"&=": "Punctuator",
 	"|=": "Punctuator",
 	"^=": "Punctuator",
 	"/": {
 		"_end": "DivPunctuator",
 		"=": "DivPunctuator"
 	},
 	"NullLiteral": "Literal",
 	"BooleanLiteral": "Literal",
 	"NumericLiteral": "Literal",
 	"StringLiteral": "Literal",
 	"RegularExpressionLiteral": "Literal",
 	"null": "NullLiteral",
 	"true": "BooleanLiteral",
 	"false": "BooleanLiteral",
 	"DecimalLiteral": "NumericLiteral",
 	"HexIntegerLiteral": "NumericLiteral",
 	"DecimalIntegerLiteral": {
 		"_end": "DecimalLiteral",
 		"ExponentPart": "DecimalLiteral",
 		".": {
 			"_end": "DecimalLiteral",
 			"DecimalDigits": {
 				"_end": "DecimalLiteral",
 				"ExponentPart": "DecimalLiteral"
 			}
 		}
 	},
 	"0": ["DecimalIntegerLiteral", "DecimalDigit", "HexDigit"],
 	"NonZeroDigit": {
 		"_end": "DecimalIntegerLiteral",
 		"DecimalDigits": "DecimalIntegerLiteral"
 	},
 	"1": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"2": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"3": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"4": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"5": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"6": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"7": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"8": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"9": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
 	"ExponentIndicator": {
 		"SignedInteger": "ExponentPart",
 	},
 	"e": ["ExponentIndicator", "HexDigit"],
 	"E": ["ExponentIndicator", "HexDigit"],
 	"DecimalDigits": "SignedInteger",
 	"0": {
 		"x": {
 			"HexDigit": "HexIntegerLiteral",
 		}
 	},
 	"0X": {
 		"HexDigit": "HexIntegerLiteral",
 	},
 	"HexIntegerLiteral": {
 		"HexDigit": "HexIntegerLiteral"
 	},
 	"a": "HexDigit",
 	"b": "HexDigit",
 	"c": "HexDigit",
 	"d": "HexDigit",
 	"f": "HexDigit",
 	"A": "HexDigit",
 	"B": "HexDigit",
 	"C": "HexDigit",
 	"D": "HexDigit",
 	"F": "HexDigit",
 };

 /*

 TODO: oddelit numeric string grammar

 */

 function GrammarWalker(grammar) {
 	this._grammar = this._rule = grammar;
 	this._resPath = [];
 	this._not = false;
 }

 GrammarWalker.prototype.gen = function(symbol) {
 	if (this._not) {
 		var rules = Object.keys(this._rule),
 			index = rules.indexOf(symbol),
 			res = [];
 		if (index > -1) {
 			rules.splice(index, 1);
 		}

 		for (var key in rules) {
 			res.push(this._rule[key]);
 		};
 	
 		return res.length > 0 ? res : true;
 	}

 	if (symbol in this._rule) {
 		if (typeof this._rule[symbol] === "string") {
 			var newSymbol = this._rule[symbol];
 			this._resPath.push(newSymbol);
 			this._rule = this._grammar;
 			var res =  this.gen(newSymbol);
 			this._rule = this._grammar;
 			return res;
 		}

 		// prepare for next symbol
 		this._rule = this._rule[symbol];
 		this._resPath.push(symbol);
 		return false;
 	} else if (this._rule !== this._grammar) {
 		this._rule = this._grammar;
 		var res = this.gen(symbol);
 		this._rule = this._grammar;
 		return false;
 	}

 	if ("_end" in this._rule) {
 		var newSymbol = this._rule["_end"];
 		this._resPath.push(newSymbol);
 		this._rule = this._grammar;
 		return this.gen(newSymbol);
 	}

 	if ("_not" in this._rule) {
 		this._not = !this._not;
 		this._rule = this._rule["_not"];
 		return this.gen(symbol);
 	}

 	return { path: this._resPath };
 };

 function Lexer(source) {
 	this._source = source;
 	this._position = 0;
 	this._gw = new GrammarWalker(LEXICAL_GRAMMAR);
 	this._tokens = [];
 }

 Lexer.prototype.next = function() {
 	var ch = this._source.charAt(this._position),
 		res;

 	if (ch === "") {
 		return false;
 	};

 	res = this._gw.gen(ch);

 	res ? this._tokens.push(res) : false;
 	this._position++;
 	return true;
 };

 var lexer = new Lexer("0xab");

 while (lexer.next()) {}

 console.log(lexer._tokens);
	"use strict"

	var LEXICAL_GRAMMAR = {
	"SingleLineComment": "Comment",
	"MultiLineComment": "Comment",
	"/*": {
	"MultilineCommentChars": {
	"*/": "MultiLineComment"
	},
	"*/": "MultiLineComment"
	},
	"MultiLineNotAsteriskChar": {
	"MultiLineCommentChars": "MultiLineCommentChars",
	},
	"*": {
	"_end": ["MultiLineCommentChars", "PostAsteriskCommentChars"],
	"PostAsterisCommentChars": ["MultiLineCommentChars", "PostAsteriskCommentChars"]
	},
	"MultiLineNotForwardSlashOrAsteriskChar": {
	"_end": "PostAsteriskCommentChars",
	"MultiLineCommentChars": "PostAsteriskCommentChars",
	},
	"SourceCharacter": {
	"_not": {
	"*": ["MultiLineNotAsteriskChar", "MultiLineNotForwardSlashOrAsteriskChar"],
	"/": "MultiLineNotForwardSlashOrAsteriskChar",
	"LineTerminator": "SingleLineCommentChar"
	}
	},
	"//": {
	"_end": "SingleLineComment",
	"SingleLineCommentChars": "SingleLineComment"
	},
	"SingleLineCommentChar": {
	"_end": "SingleLineCommentChars",
	"SingleLineCommentChars": "SingleLineCommentChars"
	},
	"StringLiteral": "Token",
	"Punctuator": "Token",
	"NumericalLiteral": "Token",
	"Identifier": "Token",
	"IdentifierName": {
	"_not": {
	"ReservedWord": "Identifier"
	},
	"IdentifierPart": "IdentifierName"
	},
	"IdentifierStart": ["IdentifierName","IdentifierPart"],
	"UnicodeLetter": "IdentifierStart",
	"$": "IdentifierStart",
	"_": "IdentifierStart",
	"\\": {
	"UnicodeEscapeSequence": "IdentifierStart"
	},
	"UnicodeCombiningMark": "IdentifierPart",
	"UnicodeDigit": "IdentifierPart",
	"UnicodeConnectorPunctuation": "IdentifierPart",
	"<ZWNJ>": "IdentifierPart",
	"<ZWJ>": "IdentifierPart",
	"_Ll": "UnicodeLetter",
	"_Lu": "UnicodeLetter",
	"_Lt": "UnicodeLetter",
	"_Lm": "UnicodeLetter",
	"_Lo": "UnicodeLetter",
	"_Nl": "UnicodeLetter",
	"_Mn": "UnicodeCombiningMark",
	"_Mc": "UnicodeCombiningMark",
	"_Nd": "UnicodeDigit",
	"_Pc": "UnicodeConnectorPunctuation",
	"Keyword": "ReservedWord",
	"FutureReservedWord": "ReservedWord",
	"NullLiteral": "ReservedWord",
	"BooleanLiteral": "ReservedWord",
	"break": "Keyword",
	"case": "Keyword",
	"catch": "Keyword",
	"continue": "Keyword",
	"debugger": "Keyword",
	"default": "Keyword",
	"delete": "Keyword",
	"do": "Keyword",
	"else": "Keyword",
	"finally": "Keyword",
	"for": "Keyword",
	"function": "Keyword",
	"if": "Keyword",
	"in": "Keyword",
	"instanceof": "Keyword",
	"new": "Keyword",
	"return": "Keyword",
	"switch": "Keyword",
	"this": "Keyword",
	"throw": "Keyword",
	"try": "Keyword",
	"typeof": "Keyword",
	"var": "Keyword",
	"void": "Keyword",
	"while": "Keyword",
	"with": "Keyword",
	"class": "FutureReservedWord",
	"const": "FutureReservedWord",
	"enum": "FutureReservedWord",
	"export": "FutureReservedWord",
	"extends": "FutureReservedWord",
	"import": "FutureReservedWord",
	"super": "FutureReservedWord",
	"{": "Punctuator",
	"}": "Punctuator",
	"(": "Punctuator",
	")": "Punctuator",
	"[": "Punctuator",
	"]": "Punctuator",
	".": {
	"_end": "Punctuator",
	"DecimalDigits": {
	"_end": "DecimalLiteral",
	"ExponentPart": "DecimalLiteral"
	}
	},
	";": "Punctuator",
	",": "Punctuator",
	"<": "Punctuator",
	">": "Punctuator",
	"<=": "Punctuator",
	">=": "Punctuator",
	"==": "Punctuator",
	"!=": "Punctuator",
	"===": "Punctuator",
	"!==": "Punctuator",
	"+": {
	"_end": "Punctuator",
	"DecimalDigits": "SignedInteger",
	},
	"-": {
	"_end": "Punctuator",
	"DecimalDigits": "SignedInteger",
	},
	"*": "Punctuator",
	"%": "Punctuator",
	"++": "Punctuator",
	"--": "Punctuator",
	"<<": "Punctuator",
	">>": "Punctuator",
	">>>": "Punctuator",
	"&": "Punctuator",
	"\|": "Punctuator",
	"^": "Punctuator",
	"!": "Punctuator",
	"~": "Punctuator",
	"&&": "Punctuator",
	"\|\|": "Punctuator",
	"?": "Punctuator",
	":": "Punctuator",
	"=": "Punctuator",
	"+=": "Punctuator",
	"-=": "Punctuator",
	"*=": "Punctuator",
	"%=": "Punctuator",
	"<<=": "Punctuator",
	">>=": "Punctuator",
	">>>=": "Punctuator",
	"&=": "Punctuator",
	"\|=": "Punctuator",
	"^=": "Punctuator",
	"/": {
	"_end": "DivPunctuator",
	"=": "DivPunctuator"
	},
	"NullLiteral": "Literal",
	"BooleanLiteral": "Literal",
	"NumericLiteral": "Literal",
	"StringLiteral": "Literal",
	"RegularExpressionLiteral": "Literal",
	"null": "NullLiteral",
	"true": "BooleanLiteral",
	"false": "BooleanLiteral",
	"DecimalLiteral": "NumericLiteral",
	"HexIntegerLiteral": "NumericLiteral",
	"DecimalIntegerLiteral": {
	"_end": "DecimalLiteral",
	"ExponentPart": "DecimalLiteral",
	".": {
	"_end": "DecimalLiteral",
	"DecimalDigits": {
	"_end": "DecimalLiteral",
	"ExponentPart": "DecimalLiteral"
	}
	}
	},
	"0": ["DecimalIntegerLiteral", "DecimalDigit", "HexDigit"],
	"NonZeroDigit": {
	"_end": "DecimalIntegerLiteral",
	"DecimalDigits": "DecimalIntegerLiteral"
	},
	"1": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"2": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"3": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"4": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"5": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"6": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"7": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"8": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"9": ["DecimalDigit", "NonZeroDigit", "HexDigit"],
	"ExponentIndicator": {
	"SignedInteger": "ExponentPart",
	},
	"e": ["ExponentIndicator", "HexDigit"],
	"E": ["ExponentIndicator", "HexDigit"],
	"DecimalDigits": "SignedInteger",
	"0": {
	"x": {
	"HexDigit": "HexIntegerLiteral",
	}
	},
	"0X": {
	"HexDigit": "HexIntegerLiteral",
	},
	"HexIntegerLiteral": {
	"HexDigit": "HexIntegerLiteral"
	},
	"a": "HexDigit",
	"b": "HexDigit",
	"c": "HexDigit",
	"d": "HexDigit",
	"f": "HexDigit",
	"A": "HexDigit",
	"B": "HexDigit",
	"C": "HexDigit",
	"D": "HexDigit",
	"F": "HexDigit",
	};

	/*

	TODO: oddelit numeric string grammar

	*/

	function GrammarWalker(grammar) {
	this._grammar = this._rule = grammar;
	this._resPath = [];
	this._not = false;
	}

	GrammarWalker.prototype.gen = function(symbol) {
	if (this._not) {
	var rules = Object.keys(this._rule),
	index = rules.indexOf(symbol),
	res = [];
	if (index > -1) {
	rules.splice(index, 1);
	}

	for (var key in rules) {
	res.push(this._rule[key]);
	};

	return res.length > 0 ? res : true;
	}

	if (symbol in this._rule) {
	if (typeof this._rule[symbol] === "string") {
	var newSymbol = this._rule[symbol];
	this._resPath.push(newSymbol);
	this._rule = this._grammar;
	var res = this.gen(newSymbol);
	this._rule = this._grammar;
	return res;
	}

	// prepare for next symbol
	this._rule = this._rule[symbol];
	this._resPath.push(symbol);
	return false;
	} else if (this._rule !== this._grammar) {
	this._rule = this._grammar;
	var res = this.gen(symbol);
	this._rule = this._grammar;
	return false;
	}

	if ("_end" in this._rule) {
	var newSymbol = this._rule["_end"];
	this._resPath.push(newSymbol);
	this._rule = this._grammar;
	return this.gen(newSymbol);
	}

	if ("_not" in this._rule) {
	this._not = !this._not;
	this._rule = this._rule["_not"];
	return this.gen(symbol);
	}

	return { path: this._resPath };
	};

	function Lexer(source) {
	this._source = source;
	this._position = 0;
	this._gw = new GrammarWalker(LEXICAL_GRAMMAR);
	this._tokens = [];
	}

	Lexer.prototype.next = function() {
	var ch = this._source.charAt(this._position),
	res;

	if (ch === "") {
	return false;
	};

	res = this._gw.gen(ch);

	res ? this._tokens.push(res) : false;
	this._position++;
	return true;
	};

	var lexer = new Lexer("0xab");

	while (lexer.next()) {}

	console.log(lexer._tokens);