Skip to content

Instantly share code, notes, and snippets.

@orontee
Last active December 27, 2015 14:09
Show Gist options
  • Save orontee/7338913 to your computer and use it in GitHub Desktop.
Save orontee/7338913 to your computer and use it in GitHub Desktop.
Arithmetic expressions lexer and parser
import argparse
import operator
sym_tokens = {'unary_operators': (r'+', r'-'),
'binary_operators': (r'+', r'-', r'*', r'/'),
'grouping_open': (r'(',),
'grouping_close': (r')',)}
semantic = {'binary_operators': {r'+': operator.add,
r'-': operator.sub,
r'*': operator.mul,
r'/': operator.div},
'unary_operators': {r'-': operator.neg}}
digits = (r'0', r'1', r'2', r'3', r'4', r'5', r'6', r'7', r'8', r'9')
class LexerException(Exception):
"""Base class for all lexer exceptions.
"""
pass
class ParserException(Exception):
"""Base class for parser exceptions.
"""
pass
class Lexer(object):
"""Syntax analyser for arithmetic expressions.
The lexical analysis is governed by the global variables
``sym_tokens`` and ``digits``.
A simple use case:
>>> l = Lexer()
>>> [t for t in l("2+433-(5*453.34)")]
[2.0, '+', 433.0, '-', '(', 5.0, '*', 453.34, ')']
The LexerException is raised when the analysis fails:
>>> from lexarith import LexerException
>>> [t for t in l("5*pi/2")]
Traceback (most recent call last):
LexerException: Unexpected character: p
>>> l = Lexer(int)
>>> [t for t in l("5+3-12*2")]
[5, '+', 3, '-', 12, '*', 2]
"""
def __init__(self, ntype=float):
"""Initialize lexer.
"""
self.syms = [c for l in sym_tokens.values() for c in l]
self.ntype = ntype
def __call__(self, s):
"""Build a generator that performs a lexical analysis of the given
string.
"""
buff = None
for c in s:
if c in self.syms:
if buff is not None:
yield self.convert(buff)
buff = None
yield c
elif c in digits:
buff = (buff or '') + c
elif c in (r'.', r','):
if self.ntype != float:
raise LexerException('Unexpected character: {0}'.format(c))
if buff is not None:
buff += c
else:
raise LexerException('Unexpected character: {0}'.format(c))
if buff is not None:
yield self.convert(buff)
def convert(self, s):
"""Convert the ``s`` to the type ``self.ntype``.
"""
try:
n = self.ntype(s)
except ValueError:
raise LexerException('Float conversion failed: {0}'.format(s))
return n
class Parser(object):
"""Parse tokens as returned by the lexer.
>>> l = Lexer(int)
>>> p = Parser(int)
>>> p([t for t in l("5+3-(12*2)")])
-16
"""
class Node(object):
"""Arithmetic node.
"""
def __init__(self, owner=None, value=None, parent=None):
"""Initialize an arithmetic node.
"""
if owner is not None:
self._owner = owner
self._parent = parent
self._children = []
self._value = value
def __str__(self):
"""Convert to string.
"""
return 'Arithmetic Node: {0}'.format(self.value)
@property
def owner(self):
"""Get the owner.
"""
try:
return self._owner
except AttributeError:
return self.parent.owner if self.parent else None
@property
def ntype(self):
"""Get the number type.
The number type is read from the ``owner`` property.
"""
return self.owner.ntype if self.owner else None
def getparent(self):
return self._parent
def setparent(self, parent):
self._parent = parent
parent = property(getparent, setparent)
@property
def children(self):
"""Get the children.
"""
return self._children
def enlarge(self, value=None):
"""Append a new child.
"""
child = Parser.Node(parent=self, value=value)
self.children.append(child)
return child
def downgrade(self, value=None):
"""Return a new parent.
"""
node = Parser.Node(owner=self.owner, value=value,
parent=self.parent)
if self.parent is not None:
del self.parent.children[self.parent.children.index(self)]
node.children.append(self)
return node
def setvalue(self, value):
self._value = value
def getvalue(self):
return self._value
value = property(getvalue, setvalue)
def collapse(self):
"""Collapse the subtree rooted at this node.
Evaluate the expression represented by this node, update
``value`` property and remove the children.
"""
if len(self.children) == 0:
if isinstance(self.value, self.ntype):
pass
else:
raise ParserException('Unexpected value')
else:
if len(self.children) == 2:
if self.value in sym_tokens['binary_operators']:
op = semantic['binary_operators'][self.value]
elif len(self.children) == 1:
if self.value in sym_tokens['unary_operators']:
op = semantic['unary_operators'][self.value]
else:
raise ParserException('Unexpected number of operands')
args = [n.value for n in self.children]
self.value = op(*args)
self._children = []
def __init__(self, ntype=float):
self._ntype = ntype
@property
def ntype(self):
"""Get the number type.
"""
return self._ntype
def __call__(self, tokens):
"""Build and evaluate the graph representing the arithmetic expression
associated to ``tokens``.
The value of the graph is returned.
"""
root, last = None, None
for t in tokens:
if isinstance(t, self.ntype):
if last is None:
last = Parser.Node(owner=self, value=t)
root = last
else:
last = last.enlarge(t)
elif t in sym_tokens['binary_operators']:
if isinstance(last.value, self.ntype):
if t in (r'*', r'/'):
obsolete = (root is last)
last = last.downgrade(t)
if obsolete:
root = last
else:
if last.parent is not None:
obsolete = (root is last.parent)
last = last.parent.downgrade(t)
if obsolete:
root = last
else:
obsolete = (root is last)
last = last.downgrade(t)
if obsolete:
root = last
else:
raise ParserException('Unexpected operator')
# elif t in sym_tokens['grouping_open']:
# group.append(node)
# elif t in sym_tokens['grouping_close']:
# try:
# node = group.pop()
# except IndexError:
# raise ParserException('Unbalanced parenthesis')
# root.collapse()
return root.value
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='Arithmetic lexer and parser')
parser.add_argument('-a', dest='analyze', action='store_true',
help='lexical analysis only')
parser.add_argument('input', type=str, help='string to parse')
args = parser.parse_args()
l = Lexer()
if args.analyze:
for t in l(args.input):
print(t)
else:
p = Parser()
print(p([t for t in l(args.input)]))
# Local Variables:
# compile-command: "python2 -m doctest -v lexarith.py"
# End:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment