Created
April 11, 2014 14:00
-
-
Save seriyps/10471310 to your computer and use it in GitHub Desktop.
Lisp subset interpreter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
''' | |
Created on 2014-04-05 | |
@author: Sergey Prokhorov <[email protected]> | |
''' | |
import re | |
import operator | |
import decimal | |
class LexerError(Exception): | |
def __init__(self, line, pos): | |
super(LexerError, self).__init__(line, pos) | |
self.line = line | |
self.pos = pos | |
def __unicode__(self): | |
l = max(self.pos - 5, 0) | |
return u"Scan error near: '{0}' position={1}".format( | |
self.line[l:self.pos + 5], self.pos) | |
class ParserError(Exception): | |
def __init__(self, msg, token_type, re_match, state): | |
super(ParserError, self).__init__(msg, token_type, re_match, state) | |
self.msg = msg | |
self.token_type = token_type | |
self.re_match = re_match | |
self.state = state | |
def __unicode__(self): | |
return u"Parser error: {0} position={1} state={2}".format( | |
self.msg, self.re_match.pos, self.state) | |
def Lexer(rules): | |
prepared = [(re.compile(regexp), token_type) | |
for regexp, token_type in rules] | |
def lex(line): | |
ll = len(line) | |
i = 0 | |
while i < ll: | |
for pattern, token_type in prepared: | |
match = pattern.match(line, i) | |
if match is None: | |
continue | |
i = match.end() | |
yield (match, token_type) | |
break | |
if match is None: | |
raise LexerError(line, i) | |
return lex | |
(T_LP, T_RP, T_PLUS, T_MINUS, T_MUL, T_DIV, T_GT, T_LT, T_GTE, T_LTE, T_EQ, | |
T_IF, T_VAR, T_NUM, T_SP) = range(15) | |
RULES = [ | |
(r"\s+", T_SP), | |
(r"\(", T_LP), | |
(r"\)", T_RP), | |
(r"\+", T_PLUS), | |
(r"\-", T_MINUS), | |
(r"\*", T_MUL), | |
(r"\/", T_DIV), | |
(r">=", T_GTE), | |
(r"<=", T_LTE), | |
(r">", T_GT), | |
(r"<", T_LT), | |
(r"==", T_EQ), | |
(r"if", T_IF), | |
(r"([a-z]+)", T_VAR), | |
(r"([0-9]+(\.[0-9]+)?)", T_NUM), | |
] | |
lexer = Lexer(RULES) | |
class Literal(object): | |
def __init__(self, val): | |
self.val = val | |
def eval(self, ctx): | |
return self.val | |
class SExp(object): | |
__slots__ = ('fun', 'args') | |
def __init__(self, fun, args): | |
self.fun = fun | |
self.args = args | |
def eval(self, ctx): | |
return self.fun(*(a.eval(ctx) for a in self.args)) | |
class Variable(object): | |
def __init__(self, name): | |
self.name = name | |
def eval(self, ctx): | |
return ctx[self.name] | |
OP2FUN = { | |
T_PLUS: operator.add, | |
T_MINUS: operator.sub, | |
T_MUL: operator.mul, | |
T_DIV: operator.div, | |
T_GTE: operator.ge, | |
T_LTE: operator.le, | |
T_GT: operator.gt, | |
T_LT: operator.lt, | |
T_EQ: operator.eq, | |
T_IF: lambda cond, then, else_: then if cond else else_ | |
} | |
S_LP, S_OP = range(2) | |
def parse(tokens): | |
state = S_OP | |
stack = [SExp(lambda a: a, [])] | |
for m, t in tokens: | |
if t == T_SP: | |
continue | |
if state is S_LP: | |
if t in OP2FUN: | |
stack[-1].fun = OP2FUN[t] | |
state = S_OP | |
continue | |
raise ParserError("Need operator after '('", t, m, state) | |
elif state is S_OP: | |
if t == T_NUM: | |
stack[-1].args.append(Literal(decimal.Decimal(m.group(1)))) | |
continue | |
elif t == T_VAR: | |
stack[-1].args.append(Variable(m.group(1))) | |
continue | |
elif t == T_RP: | |
sexp = stack.pop() | |
stack[-1].args.append(sexp) | |
continue | |
elif t == T_LP: | |
stack.append(SExp(None, [])) | |
state = S_LP | |
continue | |
raise ParserError("Invalid token", t, m, state) | |
if len(stack) != 1: | |
raise ParserError("Unbalanced parenthesis", t, m, state) | |
return stack[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment