Last active
December 27, 2015 14:09
-
-
Save orontee/7338913 to your computer and use it in GitHub Desktop.
Arithmetic expressions lexer and parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import operator | |
sym_tokens = {'unary_operators': (r'+', r'-'), | |
'binary_operators': (r'+', r'-', r'*', r'/'), | |
'grouping_open': (r'(',), | |
'grouping_close': (r')',)} | |
semantic = {'binary_operators': {r'+': operator.add, | |
r'-': operator.sub, | |
r'*': operator.mul, | |
r'/': operator.div}, | |
'unary_operators': {r'-': operator.neg}} | |
digits = (r'0', r'1', r'2', r'3', r'4', r'5', r'6', r'7', r'8', r'9') | |
class LexerException(Exception): | |
"""Base class for all lexer exceptions. | |
""" | |
pass | |
class ParserException(Exception): | |
"""Base class for parser exceptions. | |
""" | |
pass | |
class Lexer(object): | |
"""Syntax analyser for arithmetic expressions. | |
The lexical analysis is governed by the global variables | |
``sym_tokens`` and ``digits``. | |
A simple use case: | |
>>> l = Lexer() | |
>>> [t for t in l("2+433-(5*453.34)")] | |
[2.0, '+', 433.0, '-', '(', 5.0, '*', 453.34, ')'] | |
The LexerException is raised when the analysis fails: | |
>>> from lexarith import LexerException | |
>>> [t for t in l("5*pi/2")] | |
Traceback (most recent call last): | |
LexerException: Unexpected character: p | |
>>> l = Lexer(int) | |
>>> [t for t in l("5+3-12*2")] | |
[5, '+', 3, '-', 12, '*', 2] | |
""" | |
def __init__(self, ntype=float): | |
"""Initialize lexer. | |
""" | |
self.syms = [c for l in sym_tokens.values() for c in l] | |
self.ntype = ntype | |
def __call__(self, s): | |
"""Build a generator that performs a lexical analysis of the given | |
string. | |
""" | |
buff = None | |
for c in s: | |
if c in self.syms: | |
if buff is not None: | |
yield self.convert(buff) | |
buff = None | |
yield c | |
elif c in digits: | |
buff = (buff or '') + c | |
elif c in (r'.', r','): | |
if self.ntype != float: | |
raise LexerException('Unexpected character: {0}'.format(c)) | |
if buff is not None: | |
buff += c | |
else: | |
raise LexerException('Unexpected character: {0}'.format(c)) | |
if buff is not None: | |
yield self.convert(buff) | |
def convert(self, s): | |
"""Convert the ``s`` to the type ``self.ntype``. | |
""" | |
try: | |
n = self.ntype(s) | |
except ValueError: | |
raise LexerException('Float conversion failed: {0}'.format(s)) | |
return n | |
class Parser(object): | |
"""Parse tokens as returned by the lexer. | |
>>> l = Lexer(int) | |
>>> p = Parser(int) | |
>>> p([t for t in l("5+3-(12*2)")]) | |
-16 | |
""" | |
class Node(object): | |
"""Arithmetic node. | |
""" | |
def __init__(self, owner=None, value=None, parent=None): | |
"""Initialize an arithmetic node. | |
""" | |
if owner is not None: | |
self._owner = owner | |
self._parent = parent | |
self._children = [] | |
self._value = value | |
def __str__(self): | |
"""Convert to string. | |
""" | |
return 'Arithmetic Node: {0}'.format(self.value) | |
@property | |
def owner(self): | |
"""Get the owner. | |
""" | |
try: | |
return self._owner | |
except AttributeError: | |
return self.parent.owner if self.parent else None | |
@property | |
def ntype(self): | |
"""Get the number type. | |
The number type is read from the ``owner`` property. | |
""" | |
return self.owner.ntype if self.owner else None | |
def getparent(self): | |
return self._parent | |
def setparent(self, parent): | |
self._parent = parent | |
parent = property(getparent, setparent) | |
@property | |
def children(self): | |
"""Get the children. | |
""" | |
return self._children | |
def enlarge(self, value=None): | |
"""Append a new child. | |
""" | |
child = Parser.Node(parent=self, value=value) | |
self.children.append(child) | |
return child | |
def downgrade(self, value=None): | |
"""Return a new parent. | |
""" | |
node = Parser.Node(owner=self.owner, value=value, | |
parent=self.parent) | |
if self.parent is not None: | |
del self.parent.children[self.parent.children.index(self)] | |
node.children.append(self) | |
return node | |
def setvalue(self, value): | |
self._value = value | |
def getvalue(self): | |
return self._value | |
value = property(getvalue, setvalue) | |
def collapse(self): | |
"""Collapse the subtree rooted at this node. | |
Evaluate the expression represented by this node, update | |
``value`` property and remove the children. | |
""" | |
if len(self.children) == 0: | |
if isinstance(self.value, self.ntype): | |
pass | |
else: | |
raise ParserException('Unexpected value') | |
else: | |
if len(self.children) == 2: | |
if self.value in sym_tokens['binary_operators']: | |
op = semantic['binary_operators'][self.value] | |
elif len(self.children) == 1: | |
if self.value in sym_tokens['unary_operators']: | |
op = semantic['unary_operators'][self.value] | |
else: | |
raise ParserException('Unexpected number of operands') | |
args = [n.value for n in self.children] | |
self.value = op(*args) | |
self._children = [] | |
def __init__(self, ntype=float): | |
self._ntype = ntype | |
@property | |
def ntype(self): | |
"""Get the number type. | |
""" | |
return self._ntype | |
def __call__(self, tokens): | |
"""Build and evaluate the graph representing the arithmetic expression | |
associated to ``tokens``. | |
The value of the graph is returned. | |
""" | |
root, last = None, None | |
for t in tokens: | |
if isinstance(t, self.ntype): | |
if last is None: | |
last = Parser.Node(owner=self, value=t) | |
root = last | |
else: | |
last = last.enlarge(t) | |
elif t in sym_tokens['binary_operators']: | |
if isinstance(last.value, self.ntype): | |
if t in (r'*', r'/'): | |
obsolete = (root is last) | |
last = last.downgrade(t) | |
if obsolete: | |
root = last | |
else: | |
if last.parent is not None: | |
obsolete = (root is last.parent) | |
last = last.parent.downgrade(t) | |
if obsolete: | |
root = last | |
else: | |
obsolete = (root is last) | |
last = last.downgrade(t) | |
if obsolete: | |
root = last | |
else: | |
raise ParserException('Unexpected operator') | |
# elif t in sym_tokens['grouping_open']: | |
# group.append(node) | |
# elif t in sym_tokens['grouping_close']: | |
# try: | |
# node = group.pop() | |
# except IndexError: | |
# raise ParserException('Unbalanced parenthesis') | |
# root.collapse() | |
return root.value | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser( | |
description='Arithmetic lexer and parser') | |
parser.add_argument('-a', dest='analyze', action='store_true', | |
help='lexical analysis only') | |
parser.add_argument('input', type=str, help='string to parse') | |
args = parser.parse_args() | |
l = Lexer() | |
if args.analyze: | |
for t in l(args.input): | |
print(t) | |
else: | |
p = Parser() | |
print(p([t for t in l(args.input)])) | |
# Local Variables: | |
# compile-command: "python2 -m doctest -v lexarith.py" | |
# End: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment