Last active
May 13, 2018 11:01
-
-
Save gatopeich/3ad77fc64f29c2adcea560cd757b59fb to your computer and use it in GitHub Desktop.
gatopeich's prototype of a minimal Javascript interpreter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# gatopeich's minimal Javascript interpreter prototype | |
# Coding this as a prototype for a specific-purpose lightweight Javascript | |
# engine in C/C++. | |
# DONE: Tokenizer (except quote parsing) | |
# DONE: Expression extraction | |
# DONE: Pretty printer | |
# next: Interpreter... | |
Punctuators = ( | |
# https://www.ecma-international.org/ecma-262/5.1/#sec-7.7 | |
'{','}','(',')','[',']', | |
'.',';',',','<','>','<=', | |
'>=','==','!=','===','!==', | |
'+','-','*','%','++','--', | |
'<<','>>','>>>','&','|','^', | |
'!','~','&&','||','?',':', | |
'=','+=','-=','*=','%=','<<=', | |
'>>=','>>>=','&=','|=','^=', | |
'/','/=' ) # := DivPunctuators | |
# Sort longer first: | |
Punctuators = sorted(Punctuators, key=lambda p: str(len(p))+p, reverse=True) | |
# print (Punctuators) | |
def tokenize(txt): | |
# 1. Remove comments | |
lines = (l.strip() for l in txt.splitlines()) | |
lines = (l[:l.find('//')] if '//' in l else l for l in lines) | |
# 2. Proccess quotations | |
# (TBD) | |
def divide_in_tokens(word): | |
if not word: | |
return [] | |
for p in Punctuators: | |
if p in word: | |
before,p,after = word.partition(p) | |
return divide_in_tokens(before) + [p] + divide_in_tokens(after) | |
return [word] | |
# 3. Lines to tokens | |
tokens = [] | |
for line in lines: | |
as_tokens = [] | |
for word in line.split(): | |
as_tokens.extend(divide_in_tokens(word)) | |
# Inject initial semicolon where adequate | |
# - The five problematic tokens (on start of a line) are | |
# open parenthesis "(", open bracket "[", slash "/", plus "+", and minus "-" | |
# - prepend ';' on any new line that does NOT start with these | |
if as_tokens and as_tokens[0] not in ('([/+-'): | |
tokens.append(';') | |
tokens.extend(as_tokens) | |
return tokens | |
PARENS = {'{':'}','(':')','[':']'} | |
def to_expressions(tokens, opening = None): | |
expressions = [opening] if opening else [] | |
closing = PARENS[opening] if opening else None | |
current_exp = [] | |
while tokens: | |
token = tokens.pop(0) | |
if token == closing: | |
if current_exp: | |
expressions.append(current_exp) | |
return expressions | |
elif token == ';': | |
if current_exp: | |
expressions.append(current_exp) | |
current_exp = [] | |
elif token in PARENS: | |
current_exp.append(to_expressions(tokens, token)) | |
else: | |
current_exp.append(token) | |
if opening: | |
raise Exception('No match for "%s"'%opening) | |
return expressions | |
WikipediaExample1=''' | |
var x = 0; // A global variable, because it is not in any function | |
function f() { | |
var z = 'foxes', r = 'birds'; // 2 local variables | |
m = 'fish'; // global, because it wasn't declared anywhere before | |
function child() { | |
var r = 'monkeys'; // This variable is local and does not affect the "birds" r of the parent function. | |
z = 'penguins'; // Closure: Child function is able to access the variables of the parent function. | |
} | |
twenty = 20; // This variable is declared on the next line, but usable anywhere in the function, even before, as here | |
var twenty; | |
child(); | |
return x; // We can use x here, because it is global | |
} | |
f(); | |
console.log(z); // This line will raise a ReferenceError exception, because the value of z is no longer available | |
''' | |
class LineCounter(): | |
def __init__(self, prefix = ': ', lines = 0): | |
self.prefix = prefix | |
self.lines = lines | |
def __str__(self): | |
self.lines += 1 | |
return '%3d%s'%(self.lines, self.prefix) | |
def indent(self): | |
self.prefix += ' ' | |
return self | |
def unindent(self): | |
self.prefix = self.prefix[:-2] | |
return self | |
def pretty_print(expressions, linecount = None): | |
is_main = not linecount | |
linedue = is_main | |
if not linecount: | |
linecount = LineCounter() | |
for expr in expressions: | |
if linedue: | |
print(end='\n%s'%linecount) | |
for elem in expr: | |
if type(elem) is str: | |
print (elem, end=' ') | |
else: | |
if elem[0] == '(': | |
print (end='( ') | |
pretty_print(elem[1:], linecount) | |
print (end = ') ') | |
else: | |
print (elem[0], end='\n%s'%linecount.indent()) | |
pretty_print(elem[1:], linecount) | |
print ('\n%s'%linecount.unindent(), end=PARENS[elem[0]]) | |
linedue = True | |
if is_main: | |
print('\n') | |
for exp in to_expressions(tokenize(WikipediaExample1)): | |
print ('>', exp) | |
pretty_print(to_expressions(tokenize(WikipediaExample1))) | |
CODE=''' | |
// Sample Javascript snippet | |
a += 1 | |
// The line below starts a new expression... | |
b = 2 | |
// Unlike this one which is a continuation... | |
+ a | |
print( a + b ) | |
''' | |
print (tokenize(CODE)) | |
pretty_print(to_expressions(tokenize(CODE))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Current output: