Last active
March 24, 2018 18:52
-
-
Save thecere/a52e7a9ab9a3e3878c08 to your computer and use it in GitHub Desktop.
scite js/jsx lua lexer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Copyright 2006-2015 Mitchell mitchell.att.foicica.com. See LICENSE. | |
-- JavaScript LPeg lexer. | |
local l = require('lexer') | |
local token, word_match = l.token, l.word_match | |
local P, R, S = lpeg.P, lpeg.R, lpeg.S | |
local M = {_NAME = 'javascript'} | |
-- Whitespace. | |
local ws = token(l.WHITESPACE, l.space^1) | |
-- Comments. | |
local line_comment = '//' * l.nonnewline_esc^0 | |
local block_comment = '/*' * (l.any - '*/')^0 * P('*/')^-1 | |
local comment = token(l.COMMENT, line_comment + block_comment) | |
-- Strings. | |
local sq_str = l.delimited_range("'") | |
local dq_str = l.delimited_range('"') | |
local xq_str = l.delimited_range('`') | |
local regex_str = #P('/') * l.last_char_includes('+-*%^!=&|?:;,([{<>') * | |
l.delimited_range('/', true) * S('igm')^0 | |
local simplestring = token(l.STRING, sq_str + dq_str) + token(l.REGEX, regex_str) | |
local templstring = token('string_template', xq_str) + token(l.REGEX, regex_str) | |
local string = simplestring + templstring | |
-- Numbers. | |
local float_int = S('-')^-1 * R('09')^1 * (P('.') * R('09')^0 * (S('Ee') * l.integer)^-1)^-1 | |
local number = token(l.NUMBER, float_int) | |
-- Keywords. | |
local keyword = token(l.KEYWORD, word_match{ | |
'abstract', 'boolean', 'break', 'byte', 'case', 'catch', 'char', 'class', | |
'const', 'continue', 'debugger', 'default', 'delete', 'do', 'double', 'else', | |
'enum', 'export', 'extends', 'false', 'final', 'finally', 'float', 'for', | |
'goto', 'if', 'implements', 'import', 'in', 'instanceof', 'int', | |
'interface', 'let', 'long', 'native', 'new', 'null', 'undefined', 'package', 'private', | |
'protected', 'public', 'return', 'short', 'static', 'super', 'switch', | |
'synchronized', 'this', 'throw', 'throws', 'transient', 'true', 'try', | |
'typeof', 'var', 'void', 'volatile', 'while', 'with', 'yield', 'function', | |
'from', 'of', 'global', 'console', 'constructor', 'async', 'await', 'as', | |
'arguments', 'eval' | |
}) | |
-- Identifiers. | |
local identifier = token(l.IDENTIFIER, l.word) | |
local variable = token(l.VARIABLE, l.word) | |
local funcname = token(l.FUNCTION, l.word) - keyword | |
-- Operators. | |
local operator = token(l.OPERATOR, S('+-/*%^!=&|?:;,.()[]{}<>')) | |
local splat = token(l.OPERATOR, P('...')) | |
local colon = token(l.OPERATOR, P(',')) | |
local double = token(l.OPERATOR, P(':')) | |
local opencurly = token(l.OPERATOR, S('{')) | |
local closecurly = token(l.OPERATOR, S('}')) | |
local openbracket = token(l.OPERATOR, S('[')) | |
local closebracket = token(l.OPERATOR, S(']')) | |
-- array unpacking: let f([a, b, ...rest], c, d) => null | |
-- XXX: no recursive unpacking, missing colons are not understood | |
local unpack_array = ws^0 * variable * ws^0 * colon^0 | |
local unpack_array_rest = ws^0 * splat * variable | |
local unpack_array_parenth = openbracket * unpack_array^0 * unpack_array_rest^-1 * ws^0 * closebracket | |
local unpack_object_parenth = opencurly * unpack_array^0 * unpack_array_rest^-1 * ws^0 * closecurly | |
-- local sig_default_param = token(l.OPERATOR, S('=')) * ws^0 * (unpack_object_parenth + unpack_array_parenth) | |
local scalar = number + identifier + string | |
local dictentry = scalar * ws^0 * double * ws^0 * scalar | |
local sig_default_param0 = token(l.OPERATOR, S('=')) * ws^0 * scalar^1 | |
local sig_default_param1 = token(l.OPERATOR, S('=')) * ws^0 * opencurly * ws^0 * (dictentry^1 * ws^0 * (colon * ws^0 * dictentry)^0 * colon^-1)^0 * ws^0 * closecurly | |
local sig_default_param2 = token(l.OPERATOR, S('=')) * ws^0 * openbracket * ws^0 * (scalar^1 * ws^0 * (colon * ws^0 * scalar)^0 * colon^-1)^0 * ws^0 * closebracket | |
local sig_default_param = sig_default_param0 + sig_default_param1 + sig_default_param2 | |
local func_sig = ws^0 * ((splat^-1 * variable) + (unpack_array_parenth + unpack_object_parenth)) * ws^0 * sig_default_param^0 * ws^0 * colon^0 | |
local func_sig_parenthesis = token(l.OPERATOR, S('(')) * func_sig^0 * ws^0 * token(l.OPERATOR, S(')')) | |
-- plain and generator form | |
local func_keyword = token(l.KEYWORD, P('function')) * (ws^0 * token(l.OPERATOR, S('*'))) | |
-- form: "function XXX()" | |
local func1 = func_keyword * ws^1 * funcname * ws^0 * func_sig_parenthesis | |
-- form: "XXX = function()" | |
local func2 = funcname * ws^0 * token(l.OPERATOR, S('=:')) * ws^0 * func_keyword * ws^0 * func_sig_parenthesis | |
-- in es6 class interface: "XXX() {}" | |
local func3 = funcname * ws^0 * func_sig_parenthesis * ws^0 * opencurly | |
local get_set = token(l.KEYWORD, P('get') + P('set')) | |
local func3_get_set = get_set * ws^1 * func3 | |
-- colorize empty () as variable | |
local lambda = (token(l.VARIABLE, P('()')) + variable + func_sig_parenthesis) * ws^0 * token(l.OPERATOR, S('=')) * token(l.OPERATOR, S('>')) | |
-- es6 form: XXX = () => asasd | |
local func4 = funcname * ws^0 * token(l.OPERATOR, S('=:')) * ws^0 * lambda | |
local in_jsx_attr = P(function(input, index) | |
--~ local f = io.open('t:/scite_lex.log', 'a') | |
local _, idx_close, idx_open | |
local idx = index | |
local stack = 1 | |
while stack > 0 do | |
_, idx_close = input:find('}', idx) | |
_, idx_open = input:find('{', idx) | |
if idx_close and (not idx_open or idx_close < idx_open) then | |
stack = stack - 1 | |
idx = idx_close + 1 | |
elseif idx_open then | |
stack = stack + 1 | |
idx = idx_open + 1 | |
end | |
if not idx_close and not idx_open then | |
idx = nil | |
break | |
end; | |
end | |
--~ f:flush() | |
return idx - 1 | |
end) | |
-- Dirk: not sure whether this newline stuff is really needed | |
-- local newline = (P"\r\n" + P"\n\r" + S"\r\n") | |
local ws_comment = (ws^0 * comment)^0 * ws | |
local jsx_tag = l.word * (P('.') * l.word)^0 | |
local jsx_attr_js = token(l.OPERATOR, S('{')) * token('embedded', in_jsx_attr) * token(l.OPERATOR, S('}')) | |
-- i dont think this is doable in one pass | |
--~ local expression = lambda + keyword + identifier + number + simplestring + templstring + operator + ws | |
--~ local jsx_attr_js = token(l.OPERATOR, S('{')) * expression^1 * token(l.OPERATOR, S('}')) | |
local jsx_attribute = token(l.LABEL, l.word) * (ws^0 * P('=') * ws^0 * (string + jsx_attr_js))^0 | |
local jsx_attributes = ws_comment * jsx_attribute | |
local jsx_closing_element = token(l.TYPE, P('</') * jsx_tag * P('>')) | |
-- between opening and closing tags we understand only the minimal {EMBEDED_JS} version | |
local minmal_body = ws^0 * jsx_attr_js * ws^0 * jsx_closing_element | |
local element_outtro = ws_comment^0 * token(l.TYPE, P('/>') + P('>')) | |
local EOF = -1 | |
-- the "-1" trailing ruling (EOF) helps if scite does not feed all required lines into the lexer | |
-- 1: with optional final delimiter | |
-- stuff = <Stuff | |
-- ref={(e) => scroller_ref = e} | |
-- key={this.state.wcsize} | |
local jsx_element_1 = token(l.TYPE, P('<') * jsx_tag) * jsx_attributes^0 * (element_outtro + ws_comment^0*EOF) * minmal_body^-1 | |
-- 2: only trailing part | |
-- ref={(e) => scroller_ref = e} | |
-- key={this.state.wcsize} | |
-- /> | |
local jsx_element_2 = jsx_attribute * jsx_attributes^0 * element_outtro | |
-- es6 classes | |
local classname = token(l.KEYWORD, P('class')) * ws^1 * token(l.CLASS, l.word) | |
M._rules = { | |
{'whitespace', ws}, | |
{'func1', func1}, | |
{'func2', func2}, | |
{'func3', func3}, | |
{'func3_get_set', func3_get_set}, | |
{'func4', func4}, | |
{'lambda', lambda}, | |
{'class', classname}, | |
{'jsx_element_1', jsx_element_1}, | |
{'jsx_element_2', jsx_element_2}, | |
{'jsx_closing_element', jsx_closing_element}, | |
{'keyword', keyword}, | |
{'identifier', identifier}, | |
{'comment', comment}, | |
{'number', number}, | |
{'string', simplestring}, | |
{'templstring', templstring}, | |
{'operator', operator}, | |
} | |
M._tokenstyles = { | |
string_template = 'string_template' | |
} | |
M._foldsymbols = { | |
_patterns = {'[{}]', '/%*', '%*/', '//'}, | |
[l.OPERATOR] = {['{'] = 1, ['}'] = -1}, | |
[l.COMMENT] = {['/*'] = 1, ['*/'] = -1, ['//'] = l.fold_line_comments('//')} | |
} | |
M._LEXBYLINE = false | |
return M |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment