Created
August 12, 2015 20:50
-
-
Save thomasballinger/a45a94dd4cee534d3033 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
def tokenize(s): | |
""" | |
>>> tokenize("(1.1 + 2 * 3)") | |
['(', '1.1', '+', '2', '*', '3', ')'] | |
""" | |
return re.findall("[0123456789.a-zA-Z]+|[*+/\-()=]", s) | |
""" | |
Statement -> Print | Goto | Assignment | |
Print -> PRINT Expr | |
Expr -> BinOpExpr | Number | Identifier | |
Assign -> Ident '=' Expr | |
BinOpExpr -> Ident Op Expr | |
Num -> \d+ | |
Ident -> \w+ | |
Op -> [+-*/] | |
10 PRINT HI | |
20 GOTO 10 | |
30 A = 2 + 3 - 4 | |
parse functions take a string and an index | |
and return a node or None and an index | |
""" | |
def parseStatement(s, i): | |
""" | |
>>> toks = tokenize('A = 2 + 3 - 4') | |
>>> toks | |
['A', '=', '2', '+', '3', '-', '4'] | |
>>> parseStatement(toks, 0) | |
(['=', 'A', ['+', 2, ['-', 3, 4]]], 7) | |
""" | |
node, i = parsePrint(s, i) | |
if node is not None: | |
return node, i | |
node, i = parseGoto(s, i) | |
if node is not None: | |
return node, i | |
node, i = parseAssignment(s, i) | |
if node is not None: | |
return node, i | |
return None, i | |
def parseAssignment(s, orig_i): | |
""" | |
>>> parseAssignment(['A', '='], 0) | |
(None, 0) | |
>>> parseAssignment(['A', '=', '1'], 0) | |
(['=', 'A', 1], 3) | |
""" | |
i = orig_i | |
if len(s) - i < 3: | |
return None, orig_i | |
ident, i = parseIdentifier(s, i) | |
if ident is None: | |
return None, orig_i | |
if s[i] != '=': | |
return None, orig_i | |
expr, i = parseExpression(s, i+1) | |
if expr is None: | |
return None, orig_i | |
return ['=', ident, expr], i | |
def parseIdentifier(s, i): | |
""" | |
>>> parseIdentifier(['asdf'], 0) | |
('asdf', 1) | |
""" | |
if re.match(r'\w+', s[i]): | |
return s[i], i+1 | |
return None, i | |
def parseExpression(s, i): | |
""" | |
>>> parseExpression(['asdf'], 0) | |
('asdf', 1) | |
>>> parseExpression(['123'], 0) | |
(123, 1) | |
>>> parseExpression(['123', '+', '45'], 0) | |
(['+', 123, 45], 3) | |
""" | |
binop, i = parseBinOpExpr(s, i) | |
if binop: | |
return binop, i | |
thing, i = parseNumOrIdent(s, i) | |
if thing is not None: | |
return thing, i | |
return None, i | |
def parseNumOrIdent(s, i): | |
""" | |
>>> parseNumOrIdent(['123'], 0) | |
(123, 1) | |
""" | |
num, i = parseNum(s, i) | |
if num is not None: | |
return num, i | |
ident, i = parseIdentifier(s, i) | |
if ident: | |
return ident, i | |
return None, i | |
def parseBinOpExpr(s, orig_i): | |
""" | |
>>> parseBinOpExpr(['hi', '+', '3'], 0) | |
(['+', 'hi', 3], 3) | |
""" | |
if len(s) - orig_i < 3: | |
return None, orig_i | |
i = orig_i | |
thing, i = parseNumOrIdent(s, i) | |
if thing is None: | |
return None, orig_i | |
op, i = parseBinOp(s, i) | |
if op is None: | |
return None, orig_i | |
expr, i = parseExpression(s, i) | |
if expr is None: | |
return None, orig_i | |
return [op, thing, expr], i | |
def parseBinOp(s, i): | |
""" | |
>>> parseBinOp(['there', '+', 'hi'], 1) | |
('+', 2) | |
""" | |
if re.match(r'[+-/*]', s[i]): | |
return s[i], i+1 | |
return None, i | |
def parseNum(s, i): | |
""" | |
>>> parseNum(['123', 'qwerty'], 0) | |
(123, 1) | |
""" | |
if re.match(r'\d+', s[i]): | |
return int(s[i]), i+1 | |
return None, i | |
def parsePrint(s, i): | |
""" | |
>>> parsePrint(["PRINT", 1], 0) | |
('print', 1) | |
""" | |
if s[i].lower() == 'print': | |
return 'print', i+1 | |
return None, i | |
def parseGoto(s, i): | |
""" | |
>>> parseGoto(["GOTO", 1], 0) | |
('goto', 1) | |
""" | |
if s[i].lower() == 'goto': | |
return 'goto', i+1 | |
return None, i | |
if __name__ == '__main__': | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment