Skip to content

Instantly share code, notes, and snippets.

@nischalshrestha
Last active April 5, 2019 21:34
Show Gist options
  • Save nischalshrestha/0a1b490ab0edd489ff898e1616a30c8d to your computer and use it in GitHub Desktop.
Save nischalshrestha/0a1b490ab0edd489ff898e1616a30c8d to your computer and use it in GitHub Desktop.
Lark example
# dependencies:
# - Python3.6
# - Lark: pip install lark-parser
import sys
import os.path
from lark import Lark, Transformer, Visitor
from lark import Tree
# https://github.com/lark-parser/lark/blob/master/lark/grammars/common.lark
pandas_grammar = """
start: subset+ -> exprs
data: "df" -> df
subset: data (rows | cols | iloc)
rows: "[" _index "]"
cols: "[[" label ("," label)* "]]"
iloc: "." "iloc" (rows | _rows_cols)
_rows_cols: "[" left ("," right?)? "]"
left: _index
right: _index
_index: range | NUMBER
range: start_idx ":" end_idx
start_idx: NUMBER*
end_idx: NUMBER*
label: "'" WORD "'"
%import common.LETTER
%import common.INT -> NUMBER
%import common.WORD
%import common.WS
%ignore WS
"""
rows = """
df[0] df[0:1] df[10:100] df[:1] df[1:]
"""
cols = """
df[['a']] df[['aaa']] df[['a', 'b', 'c']] df[['aaa', 'bbb', 'ccc']]
"""
iloc_rows = """
df.iloc[1] df.iloc[1, ] df.iloc[:] df.iloc[0:] df.iloc[:1]
df.iloc[0:1] df.iloc[0:1, 0:1] df.iloc[0:, 0:] df.iloc[:1, :1]
"""
parser = Lark(pandas_grammar, keep_all_tokens=False)
parser_ast = Lark(pandas_grammar)
print('\nrows')
print(parser.parse(rows).pretty())
print('\ncols')
print(parser.parse(cols).pretty())
print('\niloc')
print(parser.parse(iloc_rows).pretty())
class LeftRightVisitor(Visitor):
left_tree = None
right_tree = None
def left(self, tree):
self.left_tree = tree.children[0]
print(tree.data, tree)
def right(self, tree):
self.right_tree = tree.children[0]
print(tree.data, tree)
text = "df.iloc[:,]"
parse_tree = parser.parse(text)
print(parse_tree.pretty())
visitor = LeftRightVisitor()
visitor.visit(parse_tree)
print('-----')
class IndexTranslator(Transformer):
def __init__(self, side, *args, **kwargs):
self.side = side
def start_idx(self, matches):
return str(int(matches[0]) + 1) if len(matches) == 1 else '1'
def end_idx(self, matches):
if len(matches) == 1:
return matches[0]
elif self.side == 'left':
return 'nrow(df)'
elif self.side == 'right':
return 'ncol(df)'
# TODO put this into a function for handling iloc (maybe it can handle regular ones too)
new_idx = ''
new_left_idx = ''
new_right_idx = ''
if visitor.left_tree == None and visitor.right_tree == None:
new_idx = ''
if visitor.left_tree != None:
new_left_idx = IndexTranslator('left').transform(visitor.left_tree)
new_left_idx = new_left_idx.children[0] + ':' + new_left_idx.children[1]
if visitor.right_tree != None:
new_right_idx = IndexTranslator('right').transform(visitor.right_tree)
new_right_idx = new_right_idx.children[0] + ':' + new_right_idx.children[1]
print("df["+new_left_idx+","+new_right_idx+"]")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment