Last active
April 9, 2025 13:51
-
-
Save YairRand/e22aded969e6de8cbb283e62868153a1 to your computer and use it in GitHub Desktop.
Script to translate Lua code between "English" and using keywords in other languages
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var KW = { | |
en: { | |
'true': 'true', | |
'false': 'false', | |
'nil': 'nil', | |
'do': 'do', | |
'if': 'if', | |
'in': 'in', | |
'or': 'or', | |
'and': 'and', | |
'end': 'end', | |
'for': 'for', | |
'not': 'not', | |
'else': 'else', | |
'then': 'then', | |
'break': 'break', | |
'local': 'local', | |
'until': 'until', | |
'while': 'while', | |
'elseif': 'elseif', | |
'repeat': 'repeat', | |
'return': 'return', | |
'function': 'function' | |
}, | |
// For testing: A "test language" where each keyword is reversed from English. | |
test: { | |
'true': 'eurt', | |
'false': 'eslaf', | |
'nil': 'lin', | |
'do': 'od', | |
'if': 'fi', | |
'in': 'ni', | |
'or': 'ro', | |
'and': 'dna', | |
'end': 'dne', | |
'for': 'rof', | |
'not': 'ton', | |
'else': 'esle', | |
'then': 'neht', | |
'break': 'kaerb', | |
'local': 'lacol', | |
'until': 'litnu', | |
'while': 'elihw', | |
'elseif': 'fiesle', | |
'repeat': 'taeper', | |
'return': 'nruter', | |
'function': 'noitcnuf' | |
} | |
}; | |
/** | |
* @param {String} code The Lua code to translate from one language to another. | |
* @param {String} lang The language code of the language to translate to or from. | |
* @param {Boolean} toEnglish Whether to translate from the selected language to | |
* English or from English to the selected language. | |
* @return {String} The translated code. | |
* | |
* @example | |
* translateLua( 'local foo = true', 'test', false ); | |
* // returns 'lacol foo = eurt', in the "test language" where each keyword is reversed. | |
*/ | |
function translateLua( code, lang, toEnglish ) { | |
var parseLang = toEnglish ? lang : 'en', | |
luaparse = setupLuaparse( parseLang ), | |
parser = luaparse.parse( code, { wait: true } ), | |
tokensToReplace = [], | |
tokenTypes = luaparse.tokenTypes, | |
kwTypes = [ 'BooleanLiteral', 'Keyword', 'NilLiteral' ].map( key => tokenTypes[ key ] ), | |
kwMap = toEnglish ? Object.fromEntries( Object.entries( KW[ lang ] ).map( x => x.reverse() ) ) : KW[ lang ], | |
sourceLangKWs = Object.keys( kwMap ), | |
targetLangKWs = Object.values( kwMap ); | |
// Avoid naming conflicts with keywords. | |
function processVar( name ) { | |
var p = name.match( /^_*/ ), | |
underscores = p[ 0 ], | |
rName = underscores ? name.substr( underscores.length ) : name; | |
if ( targetLangKWs.includes( rName ) ) { | |
if ( !sourceLangKWs.includes( rName ) ) { | |
return '_' + name; | |
} else { | |
return; | |
} | |
} | |
if ( sourceLangKWs.includes( rName ) ) { | |
// Remove initial underscore. | |
return name.substr( 1 ); | |
} | |
return; | |
} | |
for ( var token, replaceVar; ( token = parser.lex() ).type !== tokenTypes.EOF; ) { | |
if ( kwTypes.includes( token.type ) ) { | |
tokensToReplace.push( { range: token.range, newValue: kwMap[ token.value ] } ); | |
} else if ( token.type === tokenTypes.Identifier ) { | |
replaceVar = processVar( token.value ); | |
if ( replaceVar ) { | |
tokensToReplace.push( { range: token.range, newValue: replaceVar } ); | |
} | |
} | |
} | |
return tokensToReplace.length ? | |
tokensToReplace.map( ( tokenToReplace, i, all ) => { | |
var range = tokenToReplace.range; | |
return ( | |
code.substring( i === 0 ? 0 : all[ i - 1 ].range[ 1 ], range[ 0 ] ) + | |
tokenToReplace.newValue | |
); | |
} ).join( '' ) + code.substr( tokensToReplace.slice( -1 )[ 0 ].range[ 1 ] ) : | |
code; // No keywords or variables, no changes. | |
} | |
// --------------------- | |
// The following code is from https://github.com/oxyc/luaparse/blob/master/luaparse.js , | |
// modified to use KW instead of hardcoded strings as tokens. | |
// Licensed under the MIT license: | |
// Copyright (c) Oskar Schöldström 2012-2014 | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining | |
// a copy of this software and associated documentation files (the | |
// "Software"), to deal in the Software without restriction, including | |
// without limitation the rights to use, copy, modify, merge, publish, | |
// distribute, sublicense, and/or sell copies of the Software, and to | |
// permit persons to whom the Software is furnished to do so, subject to | |
// the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be | |
// included in all copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
// SOFTWARE. | |
var setupLuaparse = (function (factory) { | |
'use strict'; | |
return function ( lang ) { | |
var luaparse = {}; | |
factory( luaparse, KW[ lang ] ); | |
return luaparse; | |
}; | |
}(function (exports, LKW) { | |
'use strict'; | |
exports.version = '0.2.1'; | |
var input, options, length, features; | |
// Options can be set either globally on the parser object through | |
// defaultOptions, or during the parse call. | |
var defaultOptions = exports.defaultOptions = { | |
// Explicitly tell the parser when the input ends. | |
wait: false | |
// Store comments as an array in the chunk object. | |
, comments: true | |
// Track identifier scopes by adding an isLocal attribute to each | |
// identifier-node. | |
, scope: false | |
// Store location information on each syntax node as | |
// `loc: { start: { line, column }, end: { line, column } }`. | |
, locations: false | |
// Store the start and end character locations on each syntax node as | |
// `range: [start, end]`. | |
, ranges: false | |
// A callback which will be invoked when a syntax node has been completed. | |
// The node which has been created will be passed as the only parameter. | |
, onCreateNode: null | |
// A callback which will be invoked when a new scope is created. | |
, onCreateScope: null | |
// A callback which will be invoked when the current scope is destroyed. | |
, onDestroyScope: null | |
// A callback which will be invoked when a local variable is declared in the current scope. | |
// The variable's name will be passed as the only parameter | |
, onLocalDeclaration: null | |
// The version of Lua targeted by the parser (string; allowed values are | |
// '5.1', '5.2', '5.3'). | |
, luaVersion: '5.1' | |
// Whether to allow code points outside the Basic Latin block in identifiers | |
, extendedIdentifiers: false | |
}; | |
// The available tokens expressed as enum flags so they can be checked with | |
// bitwise operations. | |
var EOF = 1, StringLiteral = 2, Keyword = 4, Identifier = 8 | |
, NumericLiteral = 16, Punctuator = 32, BooleanLiteral = 64 | |
, NilLiteral = 128, VarargLiteral = 256; | |
exports.tokenTypes = { EOF: EOF, StringLiteral: StringLiteral | |
, Keyword: Keyword, Identifier: Identifier, NumericLiteral: NumericLiteral | |
, Punctuator: Punctuator, BooleanLiteral: BooleanLiteral | |
, NilLiteral: NilLiteral, VarargLiteral: VarargLiteral | |
}; | |
// As this parser is a bit different from luas own, the error messages | |
// will be different in some situations. | |
var errors = exports.errors = { | |
unexpected: 'unexpected %1 \'%2\' near \'%3\'' | |
, expected: '\'%1\' expected near \'%2\'' | |
, expectedToken: '%1 expected near \'%2\'' | |
, unfinishedString: 'unfinished string near \'%1\'' | |
, malformedNumber: 'malformed number near \'%1\'' | |
, invalidVar: 'invalid left-hand side of assignment near \'%1\'' | |
, decimalEscapeTooLarge: 'decimal escape too large near \'%1\'' | |
, invalidEscape: 'invalid escape sequence near \'%1\'' | |
, hexadecimalDigitExpected: 'hexadecimal digit expected near \'%1\'' | |
, braceExpected: 'missing \'%1\' near \'%2\'' | |
, tooLargeCodepoint: 'UTF-8 value too large near \'%1\'' | |
, unfinishedLongString: 'unfinished long string (starting at line %1) near \'%2\'' | |
, unfinishedLongComment: 'unfinished long comment (starting at line %1) near \'%2\'' | |
, ambiguousSyntax: 'ambiguous syntax (function call x new statement) near \'%1\'' | |
}; | |
// ### Abstract Syntax Tree | |
// | |
// The default AST structure is inspired by the Mozilla Parser API but can | |
// easily be customized by overriding these functions. | |
var ast = exports.ast = { | |
labelStatement: function(label) { | |
return { | |
type: 'LabelStatement' | |
, label: label | |
}; | |
} | |
, breakStatement: function() { | |
return { | |
type: 'BreakStatement' | |
}; | |
} | |
, gotoStatement: function(label) { | |
return { | |
type: 'GotoStatement' | |
, label: label | |
}; | |
} | |
, returnStatement: function(args) { | |
return { | |
type: 'ReturnStatement' | |
, 'arguments': args | |
}; | |
} | |
, ifStatement: function(clauses) { | |
return { | |
type: 'IfStatement' | |
, clauses: clauses | |
}; | |
} | |
, ifClause: function(condition, body) { | |
return { | |
type: 'IfClause' | |
, condition: condition | |
, body: body | |
}; | |
} | |
, elseifClause: function(condition, body) { | |
return { | |
type: 'ElseifClause' | |
, condition: condition | |
, body: body | |
}; | |
} | |
, elseClause: function(body) { | |
return { | |
type: 'ElseClause' | |
, body: body | |
}; | |
} | |
, whileStatement: function(condition, body) { | |
return { | |
type: 'WhileStatement' | |
, condition: condition | |
, body: body | |
}; | |
} | |
, doStatement: function(body) { | |
return { | |
type: 'DoStatement' | |
, body: body | |
}; | |
} | |
, repeatStatement: function(condition, body) { | |
return { | |
type: 'RepeatStatement' | |
, condition: condition | |
, body: body | |
}; | |
} | |
, localStatement: function(variables, init) { | |
return { | |
type: 'LocalStatement' | |
, variables: variables | |
, init: init | |
}; | |
} | |
, assignmentStatement: function(variables, init) { | |
return { | |
type: 'AssignmentStatement' | |
, variables: variables | |
, init: init | |
}; | |
} | |
, callStatement: function(expression) { | |
return { | |
type: 'CallStatement' | |
, expression: expression | |
}; | |
} | |
, functionStatement: function(identifier, parameters, isLocal, body) { | |
return { | |
type: 'FunctionDeclaration' | |
, identifier: identifier | |
, isLocal: isLocal | |
, parameters: parameters | |
, body: body | |
}; | |
} | |
, forNumericStatement: function(variable, start, end, step, body) { | |
return { | |
type: 'ForNumericStatement' | |
, variable: variable | |
, start: start | |
, end: end | |
, step: step | |
, body: body | |
}; | |
} | |
, forGenericStatement: function(variables, iterators, body) { | |
return { | |
type: 'ForGenericStatement' | |
, variables: variables | |
, iterators: iterators | |
, body: body | |
}; | |
} | |
, chunk: function(body) { | |
return { | |
type: 'Chunk' | |
, body: body | |
}; | |
} | |
, identifier: function(name) { | |
return { | |
type: 'Identifier' | |
, name: name | |
}; | |
} | |
, literal: function(type, value, raw) { | |
type = (type === StringLiteral) ? 'StringLiteral' | |
: (type === NumericLiteral) ? 'NumericLiteral' | |
: (type === BooleanLiteral) ? 'BooleanLiteral' | |
: (type === NilLiteral) ? 'NilLiteral' | |
: 'VarargLiteral'; | |
return { | |
type: type | |
, value: value | |
, raw: raw | |
}; | |
} | |
, tableKey: function(key, value) { | |
return { | |
type: 'TableKey' | |
, key: key | |
, value: value | |
}; | |
} | |
, tableKeyString: function(key, value) { | |
return { | |
type: 'TableKeyString' | |
, key: key | |
, value: value | |
}; | |
} | |
, tableValue: function(value) { | |
return { | |
type: 'TableValue' | |
, value: value | |
}; | |
} | |
, tableConstructorExpression: function(fields) { | |
return { | |
type: 'TableConstructorExpression' | |
, fields: fields | |
}; | |
} | |
, binaryExpression: function(operator, left, right) { | |
var type = (LKW['and'] === operator || LKW['or'] === operator) ? | |
'LogicalExpression' : | |
'BinaryExpression'; | |
return { | |
type: type | |
, operator: operator | |
, left: left | |
, right: right | |
}; | |
} | |
, unaryExpression: function(operator, argument) { | |
return { | |
type: 'UnaryExpression' | |
, operator: operator | |
, argument: argument | |
}; | |
} | |
, memberExpression: function(base, indexer, identifier) { | |
return { | |
type: 'MemberExpression' | |
, indexer: indexer | |
, identifier: identifier | |
, base: base | |
}; | |
} | |
, indexExpression: function(base, index) { | |
return { | |
type: 'IndexExpression' | |
, base: base | |
, index: index | |
}; | |
} | |
, callExpression: function(base, args) { | |
return { | |
type: 'CallExpression' | |
, base: base | |
, 'arguments': args | |
}; | |
} | |
, tableCallExpression: function(base, args) { | |
return { | |
type: 'TableCallExpression' | |
, base: base | |
, 'arguments': args | |
}; | |
} | |
, stringCallExpression: function(base, argument) { | |
return { | |
type: 'StringCallExpression' | |
, base: base | |
, argument: argument | |
}; | |
} | |
, comment: function(value, raw) { | |
return { | |
type: 'Comment' | |
, value: value | |
, raw: raw | |
}; | |
} | |
}; | |
// Wrap up the node object. | |
function finishNode(node) { | |
// Pop a `Marker` off the location-array and attach its location data. | |
if (trackLocations) { | |
var location = locations.pop(); | |
location.complete(); | |
location.bless(node); | |
} | |
if (options.onCreateNode) options.onCreateNode(node); | |
return node; | |
} | |
// Helpers | |
// ------- | |
var slice = Array.prototype.slice | |
, toString = Object.prototype.toString | |
, indexOf = function indexOf(array, element) { | |
for (var i = 0, length = array.length; i < length; ++i) { | |
if (array[i] === element) return i; | |
} | |
return -1; | |
}; | |
// Iterate through an array of objects and return the index of an object | |
// with a matching property. | |
function indexOfObject(array, property, element) { | |
for (var i = 0, length = array.length; i < length; ++i) { | |
if (array[i][property] === element) return i; | |
} | |
return -1; | |
} | |
// A sprintf implementation using %index (beginning at 1) to input | |
// arguments in the format string. | |
// | |
// Example: | |
// | |
// // Unexpected function in token | |
// sprintf('Unexpected %2 in %1.', 'token', 'function'); | |
function sprintf(format) { | |
var args = slice.call(arguments, 1); | |
format = format.replace(/%(\d)/g, function (match, index) { | |
return '' + args[index - 1] || /* istanbul ignore next */ ''; | |
}); | |
return format; | |
} | |
// Returns a new object with the properties from all objectes passed as | |
// arguments. Last argument takes precedence. | |
// | |
// Example: | |
// | |
// this.options = extend(options, { output: false }); | |
function extend() { | |
var args = slice.call(arguments) | |
, dest = {} | |
, src, prop; | |
for (var i = 0, length = args.length; i < length; ++i) { | |
src = args[i]; | |
for (prop in src) | |
/* istanbul ignore else */ | |
if (src.hasOwnProperty(prop)) { | |
dest[prop] = src[prop]; | |
} | |
} | |
return dest; | |
} | |
// ### Error functions | |
// XXX: Eliminate this function and change the error type to be different from SyntaxError. | |
// This will unfortunately be a breaking change, because some downstream users depend | |
// on the error thrown being an instance of SyntaxError. For example, the Ace editor: | |
// <https://github.com/ajaxorg/ace/blob/4c7e5eb3f5d5ca9434847be51834a4e41661b852/lib/ace/mode/lua_worker.js#L55> | |
function fixupError(e) { | |
/* istanbul ignore if */ | |
if (!Object.create) | |
return e; | |
return Object.create(e, { | |
'line': { 'writable': true, value: e.line }, | |
'index': { 'writable': true, value: e.index }, | |
'column': { 'writable': true, value: e.column } | |
}); | |
} | |
// #### Raise an exception. | |
// | |
// Raise an exception by passing a token, a string format and its paramters. | |
// | |
// The passed tokens location will automatically be added to the error | |
// message if it exists, if not it will default to the lexers current | |
// position. | |
// | |
// Example: | |
// | |
// // [1:0] expected [ near ( | |
// raise(token, "expected %1 near %2", '[', token.value); | |
function raise(token) { | |
var message = sprintf.apply(null, slice.call(arguments, 1)) | |
, error, col; | |
if ('undefined' !== typeof token.line) { | |
col = token.range[0] - token.lineStart; | |
error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', token.line, col, message))); | |
error.line = token.line; | |
error.index = token.range[0]; | |
error.column = col; | |
} else { | |
col = index - lineStart + 1; | |
error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', line, col, message))); | |
error.index = index; | |
error.line = line; | |
error.column = col; | |
} | |
throw error; | |
} | |
// #### Raise an unexpected token error. | |
// | |
// Example: | |
// | |
// // expected <name> near '0' | |
// raiseUnexpectedToken('<name>', token); | |
function raiseUnexpectedToken(type, token) { | |
raise(token, errors.expectedToken, type, token.value); | |
} | |
// #### Raise a general unexpected error | |
// | |
// Usage should pass either a token object or a symbol string which was | |
// expected. We can also specify a nearby token such as <eof>, this will | |
// default to the currently active token. | |
// | |
// Example: | |
// | |
// // Unexpected symbol 'end' near '<eof>' | |
// unexpected(token); | |
// | |
// If there's no token in the buffer it means we have reached <eof>. | |
function unexpected(found) { | |
var near = lookahead.value; | |
if ('undefined' !== typeof found.type) { | |
var type; | |
switch (found.type) { | |
case StringLiteral: type = 'string'; break; | |
case Keyword: type = 'keyword'; break; | |
case Identifier: type = 'identifier'; break; | |
case NumericLiteral: type = 'number'; break; | |
case Punctuator: type = 'symbol'; break; | |
case BooleanLiteral: type = 'boolean'; break; | |
case NilLiteral: | |
return raise(found, errors.unexpected, 'symbol', 'nil', near); | |
} | |
return raise(found, errors.unexpected, type, found.value, near); | |
} | |
return raise(found, errors.unexpected, 'symbol', found, near); | |
} | |
// Lexer | |
// ----- | |
// | |
// The lexer, or the tokenizer reads the input string character by character | |
// and derives a token left-right. To be as efficient as possible the lexer | |
// prioritizes the common cases such as identifiers. It also works with | |
// character codes instead of characters as string comparisons was the | |
// biggest bottleneck of the parser. | |
// | |
// If `options.comments` is enabled, all comments encountered will be stored | |
// in an array which later will be appended to the chunk object. If disabled, | |
// they will simply be disregarded. | |
// | |
// When the lexer has derived a valid token, it will be returned as an object | |
// containing its value and as well as its position in the input string (this | |
// is always enabled to provide proper debug messages). | |
// | |
// `lex()` starts lexing and returns the following token in the stream. | |
var index | |
, token | |
, previousToken | |
, lookahead | |
, comments | |
, tokenStart | |
, line | |
, lineStart; | |
exports.lex = lex; | |
function lex() { | |
skipWhiteSpace(); | |
// Skip comments beginning with -- | |
while (45 === input.charCodeAt(index) && | |
45 === input.charCodeAt(index + 1)) { | |
scanComment(); | |
skipWhiteSpace(); | |
} | |
if (index >= length) return { | |
type : EOF | |
, value: '<eof>' | |
, line: line | |
, lineStart: lineStart | |
, range: [index, index] | |
}; | |
var charCode = input.charCodeAt(index) | |
, next = input.charCodeAt(index + 1); | |
// Memorize the range index where the token begins. | |
tokenStart = index; | |
if (isIdentifierStart(charCode)) return scanIdentifierOrKeyword(); | |
switch (charCode) { | |
case 39: case 34: // '" | |
return scanStringLiteral(); | |
case 48: case 49: case 50: case 51: case 52: case 53: | |
case 54: case 55: case 56: case 57: // 0-9 | |
return scanNumericLiteral(); | |
case 46: // . | |
// If the dot is followed by a digit it's a float. | |
if (isDecDigit(next)) return scanNumericLiteral(); | |
if (46 === next) { | |
if (46 === input.charCodeAt(index + 2)) return scanVarargLiteral(); | |
return scanPunctuator('..'); | |
} | |
return scanPunctuator('.'); | |
case 61: // = | |
if (61 === next) return scanPunctuator('=='); | |
return scanPunctuator('='); | |
case 62: // > | |
if (features.bitwiseOperators) | |
if (62 === next) return scanPunctuator('>>'); | |
if (61 === next) return scanPunctuator('>='); | |
return scanPunctuator('>'); | |
case 60: // < | |
if (features.bitwiseOperators) | |
if (60 === next) return scanPunctuator('<<'); | |
if (61 === next) return scanPunctuator('<='); | |
return scanPunctuator('<'); | |
case 126: // ~ | |
if (61 === next) return scanPunctuator('~='); | |
if (!features.bitwiseOperators) | |
break; | |
return scanPunctuator('~'); | |
case 58: // : | |
if (features.labels) | |
if (58 === next) return scanPunctuator('::'); | |
return scanPunctuator(':'); | |
case 91: // [ | |
// Check for a multiline string, they begin with [= or [[ | |
if (91 === next || 61 === next) return scanLongStringLiteral(); | |
return scanPunctuator('['); | |
case 47: // / | |
// Check for integer division op (//) | |
if (features.integerDivision) | |
if (47 === next) return scanPunctuator('//'); | |
return scanPunctuator('/'); | |
case 38: case 124: // & | | |
if (!features.bitwiseOperators) | |
break; | |
/* fall through */ | |
case 42: case 94: case 37: case 44: case 123: case 125: | |
case 93: case 40: case 41: case 59: case 35: case 45: | |
case 43: // * ^ % , { } ] ( ) ; # - + | |
return scanPunctuator(input.charAt(index)); | |
} | |
return unexpected(input.charAt(index)); | |
} | |
// Whitespace has no semantic meaning in lua so simply skip ahead while | |
// tracking the encounted newlines. Any kind of eol sequence is counted as a | |
// single line. | |
function consumeEOL() { | |
var charCode = input.charCodeAt(index) | |
, peekCharCode = input.charCodeAt(index + 1); | |
if (isLineTerminator(charCode)) { | |
// Count \n\r and \r\n as one newline. | |
if (10 === charCode && 13 === peekCharCode) ++index; | |
if (13 === charCode && 10 === peekCharCode) ++index; | |
++line; | |
lineStart = ++index; | |
return true; | |
} | |
return false; | |
} | |
function skipWhiteSpace() { | |
while (index < length) { | |
var charCode = input.charCodeAt(index); | |
if (isWhiteSpace(charCode)) { | |
++index; | |
} else if (!consumeEOL()) { | |
break; | |
} | |
} | |
} | |
function encodeUTF8(codepoint) { | |
if (codepoint < 0x80) { | |
return String.fromCharCode(codepoint); | |
} else if (codepoint < 0x800) { | |
return String.fromCharCode( | |
0xc0 | (codepoint >> 6) , | |
0x80 | ( codepoint & 0x3f) | |
); | |
} else if (codepoint < 0x10000) { | |
return String.fromCharCode( | |
0xe0 | (codepoint >> 12) , | |
0x80 | ((codepoint >> 6) & 0x3f), | |
0x80 | ( codepoint & 0x3f) | |
); | |
} else if (codepoint < 0x110000) { | |
return String.fromCharCode( | |
0xf0 | (codepoint >> 18) , | |
0x80 | ((codepoint >> 12) & 0x3f), | |
0x80 | ((codepoint >> 6) & 0x3f), | |
0x80 | ( codepoint & 0x3f) | |
); | |
} else { | |
return null; | |
} | |
} | |
// This function takes a JavaScript string, encodes it in WTF-8 and | |
// reinterprets the resulting code units as code points; i.e. it encodes | |
// the string in what was the original meaning of WTF-8. | |
// | |
// For a detailed rationale, see the README.md file, section | |
// "Note on character encodings". | |
function fixupHighCharacters(s) { | |
return s.replace(/[\ud800-\udbff][\udc00-\udfff]|[^\x00-\x7f]/g, function (m) { | |
if (m.length === 1) | |
return encodeUTF8(m.charCodeAt(0)); | |
return encodeUTF8(0x10000 + (((m.charCodeAt(0) & 0x3ff) << 10) | (m.charCodeAt(1) & 0x3ff))); | |
}); | |
} | |
// Identifiers, keywords, booleans and nil all look the same syntax wise. We | |
// simply go through them one by one and defaulting to an identifier if no | |
// previous case matched. | |
function scanIdentifierOrKeyword() { | |
var value, type; | |
// Slicing the input string is prefered before string concatenation in a | |
// loop for performance reasons. | |
while (isIdentifierPart(input.charCodeAt(++index))); | |
value = fixupHighCharacters(input.slice(tokenStart, index)); | |
// Decide on the token type and possibly cast the value. | |
if (isKeyword(value)) { | |
type = Keyword; | |
} else if (LKW['true'] === value || LKW['false'] === value) { | |
type = BooleanLiteral; | |
value = (LKW['true'] === value); | |
} else if (LKW['nil'] === value) { | |
type = NilLiteral; | |
value = null; | |
} else { | |
type = Identifier; | |
} | |
return { | |
type: type | |
, value: value | |
, line: line | |
, lineStart: lineStart | |
, range: [tokenStart, index] | |
}; | |
} | |
// Once a punctuator reaches this function it should already have been | |
// validated so we simply return it as a token. | |
function scanPunctuator(value) { | |
index += value.length; | |
return { | |
type: Punctuator | |
, value: value | |
, line: line | |
, lineStart: lineStart | |
, range: [tokenStart, index] | |
}; | |
} | |
// A vararg literal consists of three dots. | |
function scanVarargLiteral() { | |
index += 3; | |
return { | |
type: VarargLiteral | |
, value: '...' | |
, line: line | |
, lineStart: lineStart | |
, range: [tokenStart, index] | |
}; | |
} | |
// Find the string literal by matching the delimiter marks used. | |
function scanStringLiteral() { | |
var delimiter = input.charCodeAt(index++) | |
, beginLine = line | |
, beginLineStart = lineStart | |
, stringStart = index | |
, string = '' | |
, charCode; | |
while (index < length) { | |
charCode = input.charCodeAt(index++); | |
if (delimiter === charCode) break; | |
if (92 === charCode) { // backslash | |
string += fixupHighCharacters(input.slice(stringStart, index - 1)) + readEscapeSequence(); | |
stringStart = index; | |
} | |
// EOF or `\n` terminates a string literal. If we haven't found the | |
// ending delimiter by now, raise an exception. | |
else if (index >= length || isLineTerminator(charCode)) { | |
string += input.slice(stringStart, index - 1); | |
raise({}, errors.unfinishedString, string + String.fromCharCode(charCode)); | |
} | |
} | |
string += fixupHighCharacters(input.slice(stringStart, index - 1)); | |
return { | |
type: StringLiteral | |
, value: string | |
, line: beginLine | |
, lineStart: beginLineStart | |
, lastLine: line | |
, lastLineStart: lineStart | |
, range: [tokenStart, index] | |
}; | |
} | |
// Expect a multiline string literal and return it as a regular string | |
// literal, if it doesn't validate into a valid multiline string, throw an | |
// exception. | |
function scanLongStringLiteral() { | |
var beginLine = line | |
, beginLineStart = lineStart | |
, string = readLongString(false); | |
// Fail if it's not a multiline literal. | |
if (false === string) raise(token, errors.expected, '[', token.value); | |
return { | |
type: StringLiteral | |
, value: fixupHighCharacters(string) | |
, line: beginLine | |
, lineStart: beginLineStart | |
, lastLine: line | |
, lastLineStart: lineStart | |
, range: [tokenStart, index] | |
}; | |
} | |
// Numeric literals will be returned as floating-point numbers instead of | |
// strings. The raw value should be retrieved from slicing the input string | |
// later on in the process. | |
// | |
// If a hexadecimal number is encountered, it will be converted. | |
function scanNumericLiteral() { | |
var character = input.charAt(index) | |
, next = input.charAt(index + 1); | |
var value = ('0' === character && 'xX'.indexOf(next || null) >= 0) ? | |
readHexLiteral() : readDecLiteral(); | |
return { | |
type: NumericLiteral | |
, value: value | |
, line: line | |
, lineStart: lineStart | |
, range: [tokenStart, index] | |
}; | |
} | |
// Lua hexadecimals have an optional fraction part and an optional binary | |
// exoponent part. These are not included in JavaScript so we will compute | |
// all three parts separately and then sum them up at the end of the function | |
// with the following algorithm. | |
// | |
// Digit := toDec(digit) | |
// Fraction := toDec(fraction) / 16 ^ fractionCount | |
// BinaryExp := 2 ^ binaryExp | |
// Number := ( Digit + Fraction ) * BinaryExp | |
function readHexLiteral() { | |
var fraction = 0 // defaults to 0 as it gets summed | |
, binaryExponent = 1 // defaults to 1 as it gets multiplied | |
, binarySign = 1 // positive | |
, digit, fractionStart, exponentStart, digitStart; | |
digitStart = index += 2; // Skip 0x part | |
// A minimum of one hex digit is required. | |
if (!isHexDigit(input.charCodeAt(index))) | |
raise({}, errors.malformedNumber, input.slice(tokenStart, index)); | |
while (isHexDigit(input.charCodeAt(index))) ++index; | |
// Convert the hexadecimal digit to base 10. | |
digit = parseInt(input.slice(digitStart, index), 16); | |
// Fraction part i optional. | |
if ('.' === input.charAt(index)) { | |
fractionStart = ++index; | |
while (isHexDigit(input.charCodeAt(index))) ++index; | |
fraction = input.slice(fractionStart, index); | |
// Empty fraction parts should default to 0, others should be converted | |
// 0.x form so we can use summation at the end. | |
fraction = (fractionStart === index) ? 0 | |
: parseInt(fraction, 16) / Math.pow(16, index - fractionStart); | |
} | |
// Binary exponents are optional | |
if ('pP'.indexOf(input.charAt(index) || null) >= 0) { | |
++index; | |
// Sign part is optional and defaults to 1 (positive). | |
if ('+-'.indexOf(input.charAt(index) || null) >= 0) | |
binarySign = ('+' === input.charAt(index++)) ? 1 : -1; | |
exponentStart = index; | |
// The binary exponent sign requires a decimal digit. | |
if (!isDecDigit(input.charCodeAt(index))) | |
raise({}, errors.malformedNumber, input.slice(tokenStart, index)); | |
while (isDecDigit(input.charCodeAt(index))) ++index; | |
binaryExponent = input.slice(exponentStart, index); | |
// Calculate the binary exponent of the number. | |
binaryExponent = Math.pow(2, binaryExponent * binarySign); | |
} | |
return (digit + fraction) * binaryExponent; | |
} | |
// Decimal numbers are exactly the same in Lua and in JavaScript, because of | |
// this we check where the token ends and then parse it with native | |
// functions. | |
function readDecLiteral() { | |
while (isDecDigit(input.charCodeAt(index))) ++index; | |
// Fraction part is optional | |
if ('.' === input.charAt(index)) { | |
++index; | |
// Fraction part defaults to 0 | |
while (isDecDigit(input.charCodeAt(index))) ++index; | |
} | |
// Exponent part is optional. | |
if ('eE'.indexOf(input.charAt(index) || null) >= 0) { | |
++index; | |
// Sign part is optional. | |
if ('+-'.indexOf(input.charAt(index) || null) >= 0) ++index; | |
// An exponent is required to contain at least one decimal digit. | |
if (!isDecDigit(input.charCodeAt(index))) | |
raise({}, errors.malformedNumber, input.slice(tokenStart, index)); | |
while (isDecDigit(input.charCodeAt(index))) ++index; | |
} | |
return parseFloat(input.slice(tokenStart, index)); | |
} | |
function readUnicodeEscapeSequence() { | |
var sequenceStart = index++; | |
if (input.charAt(index++) !== '{') | |
raise({}, errors.braceExpected, '{', '\\' + input.slice(sequenceStart, index)); | |
if (!isHexDigit(input.charCodeAt(index))) | |
raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index)); | |
while (input.charCodeAt(index) === 0x30) ++index; | |
var escStart = index; | |
while (isHexDigit(input.charCodeAt(index))) { | |
++index; | |
if (index - escStart > 6) | |
raise({}, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index)); | |
} | |
var b = input.charAt(index++); | |
if (b !== '}') { | |
if ((b === '"') || (b === "'")) | |
raise({}, errors.braceExpected, '}', '\\' + input.slice(sequenceStart, index--)); | |
else | |
raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index)); | |
} | |
var codepoint = parseInt(input.slice(escStart, index - 1), 16); | |
codepoint = encodeUTF8(codepoint); | |
if (codepoint === null) { | |
raise({}, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index)); | |
} | |
return codepoint; | |
} | |
// Translate escape sequences to the actual characters. | |
function readEscapeSequence() { | |
var sequenceStart = index; | |
switch (input.charAt(index)) { | |
// Lua allow the following escape sequences. | |
case 'a': ++index; return '\x07'; | |
case 'n': ++index; return '\n'; | |
case 'r': ++index; return '\r'; | |
case 't': ++index; return '\t'; | |
case 'v': ++index; return '\x0b'; | |
case 'b': ++index; return '\b'; | |
case 'f': ++index; return '\f'; | |
// Backslash at the end of the line. We treat all line endings as equivalent, | |
// and as representing the [LF] character (code 10). Lua 5.1 through 5.3 | |
// have been verified to behave the same way. | |
case '\r': | |
case '\n': | |
consumeEOL(); | |
return '\n'; | |
case '0': case '1': case '2': case '3': case '4': | |
case '5': case '6': case '7': case '8': case '9': | |
// \ddd, where ddd is a sequence of up to three decimal digits. | |
while (isDecDigit(input.charCodeAt(index)) && index - sequenceStart < 3) ++index; | |
var ddd = parseInt(input.slice(sequenceStart, index), 10); | |
if (ddd > 255) { | |
raise({}, errors.decimalEscapeTooLarge, '\\' + ddd); | |
} | |
return String.fromCharCode(ddd); | |
case 'z': | |
if (features.skipWhitespaceEscape) { | |
++index; | |
skipWhiteSpace(); | |
return ''; | |
} | |
break; | |
case 'x': | |
if (features.hexEscapes) { | |
// \xXX, where XX is a sequence of exactly two hexadecimal digits | |
if (isHexDigit(input.charCodeAt(index + 1)) && | |
isHexDigit(input.charCodeAt(index + 2))) { | |
index += 3; | |
return String.fromCharCode(parseInt(input.slice(sequenceStart + 1, index), 16)); | |
} | |
raise({}, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index + 2)); | |
} | |
break; | |
case 'u': | |
if (features.unicodeEscapes) | |
return readUnicodeEscapeSequence(); | |
break; | |
case '\\': case '"': case "'": | |
return input.charAt(index++); | |
} | |
if (features.strictEscapes) | |
raise({}, errors.invalidEscape, '\\' + input.slice(sequenceStart, index + 1)); | |
return input.charAt(index++); | |
} | |
// Comments begin with -- after which it will be decided if they are | |
// multiline comments or not. | |
// | |
// The multiline functionality works the exact same way as with string | |
// literals so we reuse the functionality. | |
function scanComment() { | |
tokenStart = index; | |
index += 2; // -- | |
var character = input.charAt(index) | |
, content = '' | |
, isLong = false | |
, commentStart = index | |
, lineStartComment = lineStart | |
, lineComment = line; | |
if ('[' === character) { | |
content = readLongString(true); | |
// This wasn't a multiline comment after all. | |
if (false === content) content = character; | |
else isLong = true; | |
} | |
// Scan until next line as long as it's not a multiline comment. | |
if (!isLong) { | |
while (index < length) { | |
if (isLineTerminator(input.charCodeAt(index))) break; | |
++index; | |
} | |
if (options.comments) content = input.slice(commentStart, index); | |
} | |
if (options.comments) { | |
var node = ast.comment(content, input.slice(tokenStart, index)); | |
// `Marker`s depend on tokens available in the parser and as comments are | |
// intercepted in the lexer all location data is set manually. | |
if (options.locations) { | |
node.loc = { | |
start: { line: lineComment, column: tokenStart - lineStartComment } | |
, end: { line: line, column: index - lineStart } | |
}; | |
} | |
if (options.ranges) { | |
node.range = [tokenStart, index]; | |
} | |
if (options.onCreateNode) options.onCreateNode(node); | |
comments.push(node); | |
} | |
} | |
// Read a multiline string by calculating the depth of `=` characters and | |
// then appending until an equal depth is found. | |
function readLongString(isComment) { | |
var level = 0 | |
, content = '' | |
, terminator = false | |
, character, stringStart, firstLine = line; | |
++index; // [ | |
// Calculate the depth of the comment. | |
while ('=' === input.charAt(index + level)) ++level; | |
// Exit, this is not a long string afterall. | |
if ('[' !== input.charAt(index + level)) return false; | |
index += level + 1; | |
// If the first character is a newline, ignore it and begin on next line. | |
if (isLineTerminator(input.charCodeAt(index))) consumeEOL(); | |
stringStart = index; | |
while (index < length) { | |
// To keep track of line numbers run the `consumeEOL()` which increments | |
// its counter. | |
while (isLineTerminator(input.charCodeAt(index))) consumeEOL(); | |
character = input.charAt(index++); | |
// Once the delimiter is found, iterate through the depth count and see | |
// if it matches. | |
if (']' === character) { | |
terminator = true; | |
for (var i = 0; i < level; ++i) { | |
if ('=' !== input.charAt(index + i)) terminator = false; | |
} | |
if (']' !== input.charAt(index + level)) terminator = false; | |
} | |
// We reached the end of the multiline string. Get out now. | |
if (terminator) { | |
content += input.slice(stringStart, index - 1); | |
index += level + 1; | |
return content; | |
} | |
} | |
raise({}, isComment ? | |
errors.unfinishedLongComment : | |
errors.unfinishedLongString, | |
firstLine, '<eof>'); | |
} | |
// ## Lex functions and helpers. | |
// Read the next token. | |
// | |
// This is actually done by setting the current token to the lookahead and | |
// reading in the new lookahead token. | |
function next() { | |
previousToken = token; | |
token = lookahead; | |
lookahead = lex(); | |
} | |
// Consume a token if its value matches. Once consumed or not, return the | |
// success of the operation. | |
function consume(value) { | |
if (value === token.value) { | |
next(); | |
return true; | |
} | |
return false; | |
} | |
// Expect the next token value to match. If not, throw an exception. | |
function expect(value) { | |
if (value === token.value) next(); | |
else raise(token, errors.expected, value, token.value); | |
} | |
// ### Validation functions | |
function isWhiteSpace(charCode) { | |
return 9 === charCode || 32 === charCode || 0xB === charCode || 0xC === charCode; | |
} | |
function isLineTerminator(charCode) { | |
return 10 === charCode || 13 === charCode; | |
} | |
function isDecDigit(charCode) { | |
return charCode >= 48 && charCode <= 57; | |
} | |
function isHexDigit(charCode) { | |
return (charCode >= 48 && charCode <= 57) || (charCode >= 97 && charCode <= 102) || (charCode >= 65 && charCode <= 70); | |
} | |
// From [Lua 5.2](http://www.lua.org/manual/5.2/manual.html#8.1) onwards | |
// identifiers cannot use 'locale-dependent' letters (i.e. dependent on the C locale). | |
// On the other hand, LuaJIT allows arbitrary octets ≥ 128 in identifiers. | |
function isIdentifierStart(charCode) { | |
if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode) | |
return true; | |
if (options.extendedIdentifiers && charCode >= 128) | |
return true; | |
return false; | |
} | |
function isIdentifierPart(charCode) { | |
if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode || (charCode >= 48 && charCode <= 57)) | |
return true; | |
if (options.extendedIdentifiers && charCode >= 128) | |
return true; | |
return false; | |
} | |
// [3.1 Lexical Conventions](http://www.lua.org/manual/5.2/manual.html#3.1) | |
// | |
// `true`, `false` and `nil` will not be considered keywords, but literals. | |
function isKeyword(id) { | |
if (LKW['do'] === id || LKW['if'] === id || LKW['in'] === id || LKW['or'] === id || | |
LKW['and'] === id || LKW['end'] === id || LKW['for'] === id || LKW['not'] === id || | |
LKW['else'] === id || LKW['then'] === id || | |
LKW['break'] === id || LKW['local'] === id || LKW['until'] === id || LKW['while'] === id || | |
LKW['elseif'] === id || LKW['repeat'] === id || LKW['return'] === id || | |
LKW['function'] === id | |
) | |
return true; | |
if (features.labels && !features.contextualGoto) | |
return ('goto' === id); | |
return false; | |
} | |
function isUnary(token) { | |
if (Punctuator === token.type) return '#-~'.indexOf(token.value) >= 0; | |
if (Keyword === token.type) return LKW['not'] === token.value; | |
return false; | |
} | |
// @TODO this needs to be rethought. | |
function isCallExpression(expression) { | |
switch (expression.type) { | |
case 'CallExpression': | |
case 'TableCallExpression': | |
case 'StringCallExpression': | |
return true; | |
} | |
return false; | |
} | |
// Check if the token syntactically closes a block. | |
function isBlockFollow(token) { | |
if (EOF === token.type) return true; | |
if (Keyword !== token.type) return false; | |
switch (token.value) { | |
case LKW['else']: case LKW['elseif']: | |
case LKW['end']: case LKW['until']: | |
return true; | |
default: | |
return false; | |
} | |
} | |
// Scope | |
// ----- | |
// Store each block scope as a an array of identifier names. Each scope is | |
// stored in an FILO-array. | |
var scopes | |
// The current scope index | |
, scopeDepth | |
// A list of all global identifier nodes. | |
, globals; | |
// Create a new scope inheriting all declarations from the previous scope. | |
function createScope() { | |
var scope = Array.apply(null, scopes[scopeDepth++]); | |
scopes.push(scope); | |
if (options.onCreateScope) options.onCreateScope(); | |
} | |
// Exit and remove the current scope. | |
function destroyScope() { | |
var scope = scopes.pop(); | |
scopeDepth--; | |
if (options.onDestroyScope) options.onDestroyScope(); | |
} | |
// Add identifier name to the current scope if it doesnt already exist. | |
function scopeIdentifierName(name) { | |
if (options.onLocalDeclaration) options.onLocalDeclaration(name); | |
if (-1 !== indexOf(scopes[scopeDepth], name)) return; | |
scopes[scopeDepth].push(name); | |
} | |
// Add identifier to the current scope | |
function scopeIdentifier(node) { | |
scopeIdentifierName(node.name); | |
attachScope(node, true); | |
} | |
// Attach scope information to node. If the node is global, store it in the | |
// globals array so we can return the information to the user. | |
function attachScope(node, isLocal) { | |
if (!isLocal && -1 === indexOfObject(globals, 'name', node.name)) | |
globals.push(node); | |
node.isLocal = isLocal; | |
} | |
// Is the identifier name available in this scope. | |
function scopeHasName(name) { | |
return (-1 !== indexOf(scopes[scopeDepth], name)); | |
} | |
// Location tracking | |
// ----------------- | |
// | |
// Locations are stored in FILO-array as a `Marker` object consisting of both | |
// `loc` and `range` data. Once a `Marker` is popped off the list an end | |
// location is added and the data is attached to a syntax node. | |
var locations = [] | |
, trackLocations; | |
function createLocationMarker() { | |
return new Marker(token); | |
} | |
function Marker(token) { | |
if (options.locations) { | |
this.loc = { | |
start: { | |
line: token.line | |
, column: token.range[0] - token.lineStart | |
} | |
, end: { | |
line: 0 | |
, column: 0 | |
} | |
}; | |
} | |
if (options.ranges) this.range = [token.range[0], 0]; | |
} | |
// Complete the location data stored in the `Marker` by adding the location | |
// of the *previous token* as an end location. | |
Marker.prototype.complete = function() { | |
if (options.locations) { | |
this.loc.end.line = previousToken.lastLine || previousToken.line; | |
this.loc.end.column = previousToken.range[1] - (previousToken.lastLineStart || previousToken.lineStart); | |
} | |
if (options.ranges) { | |
this.range[1] = previousToken.range[1]; | |
} | |
}; | |
Marker.prototype.bless = function (node) { | |
if (this.loc) { | |
var loc = this.loc; | |
node.loc = { | |
start: { | |
line: loc.start.line, | |
column: loc.start.column | |
}, | |
end: { | |
line: loc.end.line, | |
column: loc.end.column | |
} | |
}; | |
} | |
if (this.range) { | |
node.range = [ | |
this.range[0], | |
this.range[1] | |
]; | |
} | |
}; | |
// Create a new `Marker` and add it to the FILO-array. | |
function markLocation() { | |
if (trackLocations) locations.push(createLocationMarker()); | |
} | |
// Push an arbitrary `Marker` object onto the FILO-array. | |
function pushLocation(marker) { | |
if (trackLocations) locations.push(marker); | |
} | |
// Parse functions | |
// --------------- | |
// Chunk is the main program object. Syntactically it's the same as a block. | |
// | |
// chunk ::= block | |
function parseChunk() { | |
next(); | |
markLocation(); | |
if (options.scope) createScope(); | |
var body = parseBlock(); | |
if (options.scope) destroyScope(); | |
if (EOF !== token.type) unexpected(token); | |
// If the body is empty no previousToken exists when finishNode runs. | |
if (trackLocations && !body.length) previousToken = token; | |
return finishNode(ast.chunk(body)); | |
} | |
// A block contains a list of statements with an optional return statement | |
// as its last statement. | |
// | |
// block ::= {stat} [retstat] | |
function parseBlock(terminator) { | |
var block = [] | |
, statement; | |
while (!isBlockFollow(token)) { | |
// Return has to be the last statement in a block. | |
// Likewise 'break' in Lua older than 5.2 | |
if (LKW['return'] === token.value || (!features.relaxedBreak && LKW['break'] === token.value)) { | |
block.push(parseStatement()); | |
break; | |
} | |
statement = parseStatement(); | |
consume(';'); | |
// Statements are only added if they are returned, this allows us to | |
// ignore some statements, such as EmptyStatement. | |
if (statement) block.push(statement); | |
} | |
// Doesn't really need an ast node | |
return block; | |
} | |
// There are two types of statements, simple and compound. | |
// | |
// statement ::= break | goto | do | while | repeat | return | |
// | if | for | function | local | label | assignment | |
// | functioncall | ';' | |
function parseStatement() { | |
markLocation(); | |
if (Keyword === token.type) { | |
switch (token.value) { | |
case LKW['local']: next(); return parseLocalStatement(); | |
case LKW['if']: next(); return parseIfStatement(); | |
case LKW['return']: next(); return parseReturnStatement(); | |
case LKW['function']: next(); | |
var name = parseFunctionName(); | |
return parseFunctionDeclaration(name); | |
case LKW['while']: next(); return parseWhileStatement(); | |
case LKW['for']: next(); return parseForStatement(); | |
case LKW['repeat']: next(); return parseRepeatStatement(); | |
case LKW['break']: next(); return parseBreakStatement(); | |
case LKW['do']: next(); return parseDoStatement(); | |
case LKW['goto']: next(); return parseGotoStatement(); | |
} | |
} | |
if (features.contextualGoto && | |
token.type === Identifier && token.value === 'goto' && | |
lookahead.type === Identifier && lookahead.value !== 'goto') { | |
next(); return parseGotoStatement(); | |
} | |
if (Punctuator === token.type) { | |
if (consume('::')) return parseLabelStatement(); | |
} | |
// Assignments memorizes the location and pushes it manually for wrapper | |
// nodes. Additionally empty `;` statements should not mark a location. | |
if (trackLocations) locations.pop(); | |
// When a `;` is encounted, simply eat it without storing it. | |
if (features.emptyStatement) { | |
if (consume(';')) return; | |
} | |
return parseAssignmentOrCallStatement(); | |
} | |
// ## Statements | |
// label ::= '::' Name '::' | |
function parseLabelStatement() { | |
var name = token.value | |
, label = parseIdentifier(); | |
if (options.scope) { | |
scopeIdentifierName('::' + name + '::'); | |
attachScope(label, true); | |
} | |
expect('::'); | |
return finishNode(ast.labelStatement(label)); | |
} | |
// break ::= 'break' | |
function parseBreakStatement() { | |
return finishNode(ast.breakStatement()); | |
} | |
// goto ::= 'goto' Name | |
function parseGotoStatement() { | |
var name = token.value | |
, label = parseIdentifier(); | |
return finishNode(ast.gotoStatement(label)); | |
} | |
// do ::= 'do' block 'end' | |
function parseDoStatement() { | |
if (options.scope) createScope(); | |
var body = parseBlock(); | |
if (options.scope) destroyScope(); | |
expect(LKW['end']); | |
return finishNode(ast.doStatement(body)); | |
} | |
// while ::= 'while' exp 'do' block 'end' | |
function parseWhileStatement() { | |
var condition = parseExpectedExpression(); | |
expect(LKW['do']); | |
if (options.scope) createScope(); | |
var body = parseBlock(); | |
if (options.scope) destroyScope(); | |
expect(LKW['end']); | |
return finishNode(ast.whileStatement(condition, body)); | |
} | |
// repeat ::= 'repeat' block 'until' exp | |
function parseRepeatStatement() { | |
if (options.scope) createScope(); | |
var body = parseBlock(); | |
expect(LKW['until']); | |
var condition = parseExpectedExpression(); | |
if (options.scope) destroyScope(); | |
return finishNode(ast.repeatStatement(condition, body)); | |
} | |
// retstat ::= 'return' [exp {',' exp}] [';'] | |
function parseReturnStatement() { | |
var expressions = []; | |
if (LKW['end'] !== token.value) { | |
var expression = parseExpression(); | |
if (null != expression) expressions.push(expression); | |
while (consume(',')) { | |
expression = parseExpectedExpression(); | |
expressions.push(expression); | |
} | |
consume(';'); // grammar tells us ; is optional here. | |
} | |
return finishNode(ast.returnStatement(expressions)); | |
} | |
// if ::= 'if' exp 'then' block {elif} ['else' block] 'end' | |
// elif ::= 'elseif' exp 'then' block | |
function parseIfStatement() { | |
var clauses = [] | |
, condition | |
, body | |
, marker; | |
// IfClauses begin at the same location as the parent IfStatement. | |
// It ends at the start of `end`, `else`, or `elseif`. | |
if (trackLocations) { | |
marker = locations[locations.length - 1]; | |
locations.push(marker); | |
} | |
condition = parseExpectedExpression(); | |
expect(LKW['then']); | |
if (options.scope) createScope(); | |
body = parseBlock(); | |
if (options.scope) destroyScope(); | |
clauses.push(finishNode(ast.ifClause(condition, body))); | |
if (trackLocations) marker = createLocationMarker(); | |
while (consume(LKW['elseif'])) { | |
pushLocation(marker); | |
condition = parseExpectedExpression(); | |
expect(LKW['then']); | |
if (options.scope) createScope(); | |
body = parseBlock(); | |
if (options.scope) destroyScope(); | |
clauses.push(finishNode(ast.elseifClause(condition, body))); | |
if (trackLocations) marker = createLocationMarker(); | |
} | |
if (consume(LKW['else'])) { | |
// Include the `else` in the location of ElseClause. | |
if (trackLocations) { | |
marker = new Marker(previousToken); | |
locations.push(marker); | |
} | |
if (options.scope) createScope(); | |
body = parseBlock(); | |
if (options.scope) destroyScope(); | |
clauses.push(finishNode(ast.elseClause(body))); | |
} | |
expect(LKW['end']); | |
return finishNode(ast.ifStatement(clauses)); | |
} | |
// There are two types of for statements, generic and numeric. | |
// | |
// for ::= Name '=' exp ',' exp [',' exp] 'do' block 'end' | |
// for ::= namelist 'in' explist 'do' block 'end' | |
// namelist ::= Name {',' Name} | |
// explist ::= exp {',' exp} | |
function parseForStatement() { | |
var variable = parseIdentifier() | |
, body; | |
// The start-identifier is local. | |
if (options.scope) { | |
createScope(); | |
scopeIdentifier(variable); | |
} | |
// If the first expression is followed by a `=` punctuator, this is a | |
// Numeric For Statement. | |
if (consume('=')) { | |
// Start expression | |
var start = parseExpectedExpression(); | |
expect(','); | |
// End expression | |
var end = parseExpectedExpression(); | |
// Optional step expression | |
var step = consume(',') ? parseExpectedExpression() : null; | |
expect(LKW['do']); | |
body = parseBlock(); | |
expect(LKW['end']); | |
if (options.scope) destroyScope(); | |
return finishNode(ast.forNumericStatement(variable, start, end, step, body)); | |
} | |
// If not, it's a Generic For Statement | |
else { | |
// The namelist can contain one or more identifiers. | |
var variables = [variable]; | |
while (consume(',')) { | |
variable = parseIdentifier(); | |
// Each variable in the namelist is locally scoped. | |
if (options.scope) scopeIdentifier(variable); | |
variables.push(variable); | |
} | |
expect(LKW['in']); | |
var iterators = []; | |
// One or more expressions in the explist. | |
do { | |
var expression = parseExpectedExpression(); | |
iterators.push(expression); | |
} while (consume(',')); | |
expect(LKW['do']); | |
body = parseBlock(); | |
expect(LKW['end']); | |
if (options.scope) destroyScope(); | |
return finishNode(ast.forGenericStatement(variables, iterators, body)); | |
} | |
} | |
// Local statements can either be variable assignments or function | |
// definitions. If a function definition is found, it will be delegated to | |
// `parseFunctionDeclaration()` with the isLocal flag. | |
// | |
// This AST structure might change into a local assignment with a function | |
// child. | |
// | |
// local ::= 'local' 'function' Name funcdecl | |
// | 'local' Name {',' Name} ['=' exp {',' exp}] | |
function parseLocalStatement() { | |
var name; | |
if (Identifier === token.type) { | |
var variables = [] | |
, init = []; | |
do { | |
name = parseIdentifier(); | |
variables.push(name); | |
} while (consume(',')); | |
if (consume('=')) { | |
do { | |
var expression = parseExpectedExpression(); | |
init.push(expression); | |
} while (consume(',')); | |
} | |
// Declarations doesn't exist before the statement has been evaluated. | |
// Therefore assignments can't use their declarator. And the identifiers | |
// shouldn't be added to the scope until the statement is complete. | |
if (options.scope) { | |
for (var i = 0, l = variables.length; i < l; ++i) { | |
scopeIdentifier(variables[i]); | |
} | |
} | |
return finishNode(ast.localStatement(variables, init)); | |
} | |
if (consume(LKW['function'])) { | |
name = parseIdentifier(); | |
if (options.scope) { | |
scopeIdentifier(name); | |
createScope(); | |
} | |
// MemberExpressions are not allowed in local function statements. | |
return parseFunctionDeclaration(name, true); | |
} else { | |
raiseUnexpectedToken('<name>', token); | |
} | |
} | |
function validateVar(node) { | |
// @TODO we need something not dependent on the exact AST used. see also isCallExpression() | |
if (node.inParens || (['Identifier', 'MemberExpression', 'IndexExpression'].indexOf(node.type) === -1)) { | |
raise(token, errors.invalidVar, token.value); | |
} | |
} | |
// assignment ::= varlist '=' explist | |
// var ::= Name | prefixexp '[' exp ']' | prefixexp '.' Name | |
// varlist ::= var {',' var} | |
// explist ::= exp {',' exp} | |
// | |
// call ::= callexp | |
// callexp ::= prefixexp args | prefixexp ':' Name args | |
function parseAssignmentOrCallStatement() { | |
// Keep a reference to the previous token for better error messages in case | |
// of invalid statement | |
var previous = token | |
, expression, marker; | |
if (trackLocations) marker = createLocationMarker(); | |
expression = parsePrefixExpression(); | |
if (null == expression) return unexpected(token); | |
if (',='.indexOf(token.value) >= 0) { | |
var variables = [expression] | |
, init = [] | |
, exp; | |
validateVar(expression); | |
while (consume(',')) { | |
exp = parsePrefixExpression(); | |
if (null == exp) raiseUnexpectedToken('<expression>', token); | |
validateVar(exp); | |
variables.push(exp); | |
} | |
expect('='); | |
do { | |
exp = parseExpectedExpression(); | |
init.push(exp); | |
} while (consume(',')); | |
pushLocation(marker); | |
return finishNode(ast.assignmentStatement(variables, init)); | |
} | |
if (isCallExpression(expression)) { | |
pushLocation(marker); | |
return finishNode(ast.callStatement(expression)); | |
} | |
// The prefix expression was neither part of an assignment or a | |
// callstatement, however as it was valid it's been consumed, so raise | |
// the exception on the previous token to provide a helpful message. | |
return unexpected(previous); | |
} | |
// ### Non-statements | |
// Identifier ::= Name | |
function parseIdentifier() { | |
markLocation(); | |
var identifier = token.value; | |
if (Identifier !== token.type) raiseUnexpectedToken('<name>', token); | |
next(); | |
return finishNode(ast.identifier(identifier)); | |
} | |
// Parse the functions parameters and body block. The name should already | |
// have been parsed and passed to this declaration function. By separating | |
// this we allow for anonymous functions in expressions. | |
// | |
// For local functions there's a boolean parameter which needs to be set | |
// when parsing the declaration. | |
// | |
// funcdecl ::= '(' [parlist] ')' block 'end' | |
// parlist ::= Name {',' Name} | [',' '...'] | '...' | |
function parseFunctionDeclaration(name, isLocal) { | |
var parameters = []; | |
expect('('); | |
// The declaration has arguments | |
if (!consume(')')) { | |
// Arguments are a comma separated list of identifiers, optionally ending | |
// with a vararg. | |
while (true) { | |
if (Identifier === token.type) { | |
var parameter = parseIdentifier(); | |
// Function parameters are local. | |
if (options.scope) scopeIdentifier(parameter); | |
parameters.push(parameter); | |
if (consume(',')) continue; | |
} | |
// No arguments are allowed after a vararg. | |
else if (VarargLiteral === token.type) { | |
parameters.push(parsePrimaryExpression()); | |
} else { | |
raiseUnexpectedToken('<name> or \'...\'', token); | |
} | |
expect(')'); | |
break; | |
} | |
} | |
var body = parseBlock(); | |
expect(LKW['end']); | |
if (options.scope) destroyScope(); | |
isLocal = isLocal || false; | |
return finishNode(ast.functionStatement(name, parameters, isLocal, body)); | |
} | |
// Parse the function name as identifiers and member expressions. | |
// | |
// Name {'.' Name} [':' Name] | |
function parseFunctionName() { | |
var base, name, marker; | |
if (trackLocations) marker = createLocationMarker(); | |
base = parseIdentifier(); | |
if (options.scope) { | |
attachScope(base, scopeHasName(base.name)); | |
createScope(); | |
} | |
while (consume('.')) { | |
pushLocation(marker); | |
name = parseIdentifier(); | |
base = finishNode(ast.memberExpression(base, '.', name)); | |
} | |
if (consume(':')) { | |
pushLocation(marker); | |
name = parseIdentifier(); | |
base = finishNode(ast.memberExpression(base, ':', name)); | |
if (options.scope) scopeIdentifierName('self'); | |
} | |
return base; | |
} | |
// tableconstructor ::= '{' [fieldlist] '}' | |
// fieldlist ::= field {fieldsep field} fieldsep | |
// field ::= '[' exp ']' '=' exp | Name = 'exp' | exp | |
// | |
// fieldsep ::= ',' | ';' | |
function parseTableConstructor() { | |
var fields = [] | |
, key, value; | |
while (true) { | |
markLocation(); | |
if (Punctuator === token.type && consume('[')) { | |
key = parseExpectedExpression(); | |
expect(']'); | |
expect('='); | |
value = parseExpectedExpression(); | |
fields.push(finishNode(ast.tableKey(key, value))); | |
} else if (Identifier === token.type) { | |
if ('=' === lookahead.value) { | |
key = parseIdentifier(); | |
next(); | |
value = parseExpectedExpression(); | |
fields.push(finishNode(ast.tableKeyString(key, value))); | |
} else { | |
value = parseExpectedExpression(); | |
fields.push(finishNode(ast.tableValue(value))); | |
} | |
} else { | |
if (null == (value = parseExpression())) { | |
locations.pop(); | |
break; | |
} | |
fields.push(finishNode(ast.tableValue(value))); | |
} | |
if (',;'.indexOf(token.value) >= 0) { | |
next(); | |
continue; | |
} | |
break; | |
} | |
expect('}'); | |
return finishNode(ast.tableConstructorExpression(fields)); | |
} | |
// Expression parser | |
// ----------------- | |
// | |
// Expressions are evaluated and always return a value. If nothing is | |
// matched null will be returned. | |
// | |
// exp ::= (unop exp | primary | prefixexp ) { binop exp } | |
// | |
// primary ::= nil | false | true | Number | String | '...' | |
// | functiondef | tableconstructor | |
// | |
// prefixexp ::= (Name | '(' exp ')' ) { '[' exp ']' | |
// | '.' Name | ':' Name args | args } | |
// | |
function parseExpression() { | |
var expression = parseSubExpression(0); | |
return expression; | |
} | |
// Parse an expression expecting it to be valid. | |
function parseExpectedExpression() { | |
var expression = parseExpression(); | |
if (null == expression) raiseUnexpectedToken('<expression>', token); | |
else return expression; | |
} | |
// Return the precedence priority of the operator. | |
// | |
// As unary `-` can't be distinguished from binary `-`, unary precedence | |
// isn't described in this table but in `parseSubExpression()` itself. | |
// | |
// As this function gets hit on every expression it's been optimized due to | |
// the expensive CompareICStub which took ~8% of the parse time. | |
function binaryPrecedence(operator) { | |
var charCode = operator.charCodeAt(0) | |
, length = operator.length; | |
if (1 === length) { | |
switch (charCode) { | |
case 94: return 12; // ^ | |
case 42: case 47: case 37: return 10; // * / % | |
case 43: case 45: return 9; // + - | |
case 38: return 6; // & | |
case 126: return 5; // ~ | |
case 124: return 4; // | | |
case 60: case 62: return 3; // < > | |
} | |
} else if (2 === length) { | |
switch (charCode) { | |
case 47: return 10; // // | |
case 46: return 8; // .. | |
case 60: case 62: | |
if('<<' === operator || '>>' === operator) return 7; // << >> | |
return 3; // <= >= | |
case 61: case 126: return 3; // == ~= | |
case 111: return 1; // or | |
} | |
} else if (97 === charCode && LKW['and'] === operator) return 2; | |
return 0; | |
} | |
// Implement an operator-precedence parser to handle binary operator | |
// precedence. | |
// | |
// We use this algorithm because it's compact, it's fast and Lua core uses | |
// the same so we can be sure our expressions are parsed in the same manner | |
// without excessive amounts of tests. | |
// | |
// exp ::= (unop exp | primary | prefixexp ) { binop exp } | |
function parseSubExpression(minPrecedence) { | |
var operator = token.value | |
// The left-hand side in binary operations. | |
, expression, marker; | |
if (trackLocations) marker = createLocationMarker(); | |
// UnaryExpression | |
if (isUnary(token)) { | |
markLocation(); | |
next(); | |
var argument = parseSubExpression(10); | |
if (argument == null) raiseUnexpectedToken('<expression>', token); | |
expression = finishNode(ast.unaryExpression(operator, argument)); | |
} | |
if (null == expression) { | |
// PrimaryExpression | |
expression = parsePrimaryExpression(); | |
// PrefixExpression | |
if (null == expression) { | |
expression = parsePrefixExpression(); | |
} | |
} | |
// This is not a valid left hand expression. | |
if (null == expression) return null; | |
var precedence; | |
while (true) { | |
operator = token.value; | |
precedence = (Punctuator === token.type || Keyword === token.type) ? | |
binaryPrecedence(operator) : 0; | |
if (precedence === 0 || precedence <= minPrecedence) break; | |
// Right-hand precedence operators | |
if ('^' === operator || '..' === operator) precedence--; | |
next(); | |
var right = parseSubExpression(precedence); | |
if (null == right) raiseUnexpectedToken('<expression>', token); | |
// Push in the marker created before the loop to wrap its entirety. | |
if (trackLocations) locations.push(marker); | |
expression = finishNode(ast.binaryExpression(operator, expression, right)); | |
} | |
return expression; | |
} | |
// prefixexp ::= prefix {suffix} | |
// prefix ::= Name | '(' exp ')' | |
// suffix ::= '[' exp ']' | '.' Name | ':' Name args | args | |
// | |
// args ::= '(' [explist] ')' | tableconstructor | String | |
function parsePrefixExpression() { | |
var base, name, marker; | |
if (trackLocations) marker = createLocationMarker(); | |
// The prefix | |
if (Identifier === token.type) { | |
name = token.value; | |
base = parseIdentifier(); | |
// Set the parent scope. | |
if (options.scope) attachScope(base, scopeHasName(name)); | |
} else if (consume('(')) { | |
base = parseExpectedExpression(); | |
expect(')'); | |
base.inParens = true; // XXX: quick and dirty. needed for validateVar | |
} else { | |
return null; | |
} | |
// The suffix | |
var expression, identifier; | |
while (true) { | |
if (Punctuator === token.type) { | |
switch (token.value) { | |
case '[': | |
pushLocation(marker); | |
next(); | |
expression = parseExpectedExpression(); | |
expect(']'); | |
base = finishNode(ast.indexExpression(base, expression)); | |
break; | |
case '.': | |
pushLocation(marker); | |
next(); | |
identifier = parseIdentifier(); | |
base = finishNode(ast.memberExpression(base, '.', identifier)); | |
break; | |
case ':': | |
pushLocation(marker); | |
next(); | |
identifier = parseIdentifier(); | |
base = finishNode(ast.memberExpression(base, ':', identifier)); | |
// Once a : is found, this has to be a CallExpression, otherwise | |
// throw an error. | |
pushLocation(marker); | |
base = parseCallExpression(base); | |
break; | |
case '(': case '{': // args | |
pushLocation(marker); | |
base = parseCallExpression(base); | |
break; | |
default: | |
return base; | |
} | |
} else if (StringLiteral === token.type) { | |
pushLocation(marker); | |
base = parseCallExpression(base); | |
} else { | |
break; | |
} | |
} | |
return base; | |
} | |
// args ::= '(' [explist] ')' | tableconstructor | String | |
function parseCallExpression(base) { | |
if (Punctuator === token.type) { | |
switch (token.value) { | |
case '(': | |
if (!features.emptyStatement) { | |
if (token.line !== previousToken.line) | |
raise({}, errors.ambiguousSyntax, token.value); | |
} | |
next(); | |
// List of expressions | |
var expressions = []; | |
var expression = parseExpression(); | |
if (null != expression) expressions.push(expression); | |
while (consume(',')) { | |
expression = parseExpectedExpression(); | |
expressions.push(expression); | |
} | |
expect(')'); | |
return finishNode(ast.callExpression(base, expressions)); | |
case '{': | |
markLocation(); | |
next(); | |
var table = parseTableConstructor(); | |
return finishNode(ast.tableCallExpression(base, table)); | |
} | |
} else if (StringLiteral === token.type) { | |
return finishNode(ast.stringCallExpression(base, parsePrimaryExpression())); | |
} | |
raiseUnexpectedToken('function arguments', token); | |
} | |
// primary ::= String | Numeric | nil | true | false | |
// | functiondef | tableconstructor | '...' | |
function parsePrimaryExpression() { | |
var literals = StringLiteral | NumericLiteral | BooleanLiteral | NilLiteral | VarargLiteral | |
, value = token.value | |
, type = token.type | |
, marker; | |
if (trackLocations) marker = createLocationMarker(); | |
if (type & literals) { | |
pushLocation(marker); | |
var raw = input.slice(token.range[0], token.range[1]); | |
next(); | |
return finishNode(ast.literal(type, value, raw)); | |
} else if (Keyword === type && LKW['function'] === value) { | |
pushLocation(marker); | |
next(); | |
if (options.scope) createScope(); | |
return parseFunctionDeclaration(null); | |
} else if (consume('{')) { | |
pushLocation(marker); | |
return parseTableConstructor(); | |
} | |
} | |
// Parser | |
// ------ | |
// Export the main parser. | |
// | |
// - `wait` Hold parsing until end() is called. Defaults to false | |
// - `comments` Store comments. Defaults to true. | |
// - `scope` Track identifier scope. Defaults to false. | |
// - `locations` Store location information. Defaults to false. | |
// - `ranges` Store the start and end character locations. Defaults to | |
// false. | |
// - `onCreateNode` Callback which will be invoked when a syntax node is | |
// created. | |
// - `onCreateScope` Callback which will be invoked when a new scope is | |
// created. | |
// - `onDestroyScope` Callback which will be invoked when the current scope | |
// is destroyed. | |
// | |
// Example: | |
// | |
// var parser = require('luaparser'); | |
// parser.parse('i = 0'); | |
exports.parse = parse; | |
var versionFeatures = { | |
'5.1': { | |
}, | |
'5.2': { | |
labels: true, | |
emptyStatement: true, | |
hexEscapes: true, | |
skipWhitespaceEscape: true, | |
strictEscapes: true, | |
relaxedBreak: true | |
}, | |
'5.3': { | |
labels: true, | |
emptyStatement: true, | |
hexEscapes: true, | |
skipWhitespaceEscape: true, | |
strictEscapes: true, | |
unicodeEscapes: true, | |
bitwiseOperators: true, | |
integerDivision: true, | |
relaxedBreak: true | |
}, | |
'LuaJIT': { | |
// XXX: LuaJIT language features may depend on compilation options; may need to | |
// rethink how to handle this. Specifically, there is a LUAJIT_ENABLE_LUA52COMPAT | |
// that removes contextual goto. Maybe add 'LuaJIT-5.2compat' as well? | |
labels: true, | |
contextualGoto: true, | |
hexEscapes: true, | |
skipWhitespaceEscape: true, | |
strictEscapes: true, | |
unicodeEscapes: true | |
} | |
}; | |
function parse(_input, _options) { | |
if ('undefined' === typeof _options && 'object' === typeof _input) { | |
_options = _input; | |
_input = undefined; | |
} | |
if (!_options) _options = {}; | |
input = _input || ''; | |
options = extend(defaultOptions, _options); | |
// Rewind the lexer | |
index = 0; | |
line = 1; | |
lineStart = 0; | |
length = input.length; | |
// When tracking identifier scope, initialize with an empty scope. | |
scopes = [[]]; | |
scopeDepth = 0; | |
globals = []; | |
locations = []; | |
if (!(features = versionFeatures[options.luaVersion])) { | |
throw new Error(sprintf("Lua version '%1' not supported", options.luaVersion)); | |
} | |
if (options.comments) comments = []; | |
if (!options.wait) return end(); | |
return exports; | |
} | |
// Write to the source code buffer without beginning the parse. | |
exports.write = write; | |
function write(_input) { | |
input += String(_input); | |
length = input.length; | |
return exports; | |
} | |
// Send an EOF and begin parsing. | |
exports.end = end; | |
function end(_input) { | |
if ('undefined' !== typeof _input) write(_input); | |
// Ignore shebangs. | |
if (input && input.substr(0, 2) === '#!') input = input.replace(/^.*/, function (line) { | |
return line.replace(/./g, ' '); | |
}); | |
length = input.length; | |
trackLocations = options.locations || options.ranges; | |
// Initialize with a lookahead token. | |
lookahead = lex(); | |
var chunk = parseChunk(); | |
if (options.comments) chunk.comments = comments; | |
if (options.scope) chunk.globals = globals; | |
/* istanbul ignore if */ | |
if (locations.length > 0) | |
throw new Error('Location tracking failed. This is most likely a bug in luaparse'); | |
return chunk; | |
} | |
})); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment