Last active
April 5, 2019 21:34
-
-
Save nischalshrestha/0a1b490ab0edd489ff898e1616a30c8d to your computer and use it in GitHub Desktop.
Lark example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# dependencies: | |
# - Python3.6 | |
# - Lark: pip install lark-parser | |
import sys | |
import os.path | |
from lark import Lark, Transformer, Visitor | |
from lark import Tree | |
# https://github.com/lark-parser/lark/blob/master/lark/grammars/common.lark | |
pandas_grammar = """ | |
start: subset+ -> exprs | |
data: "df" -> df | |
subset: data (rows | cols | iloc) | |
rows: "[" _index "]" | |
cols: "[[" label ("," label)* "]]" | |
iloc: "." "iloc" (rows | _rows_cols) | |
_rows_cols: "[" left ("," right?)? "]" | |
left: _index | |
right: _index | |
_index: range | NUMBER | |
range: start_idx ":" end_idx | |
start_idx: NUMBER* | |
end_idx: NUMBER* | |
label: "'" WORD "'" | |
%import common.LETTER | |
%import common.INT -> NUMBER | |
%import common.WORD | |
%import common.WS | |
%ignore WS | |
""" | |
rows = """ | |
df[0] df[0:1] df[10:100] df[:1] df[1:] | |
""" | |
cols = """ | |
df[['a']] df[['aaa']] df[['a', 'b', 'c']] df[['aaa', 'bbb', 'ccc']] | |
""" | |
iloc_rows = """ | |
df.iloc[1] df.iloc[1, ] df.iloc[:] df.iloc[0:] df.iloc[:1] | |
df.iloc[0:1] df.iloc[0:1, 0:1] df.iloc[0:, 0:] df.iloc[:1, :1] | |
""" | |
parser = Lark(pandas_grammar, keep_all_tokens=False) | |
parser_ast = Lark(pandas_grammar) | |
print('\nrows') | |
print(parser.parse(rows).pretty()) | |
print('\ncols') | |
print(parser.parse(cols).pretty()) | |
print('\niloc') | |
print(parser.parse(iloc_rows).pretty()) | |
class LeftRightVisitor(Visitor): | |
left_tree = None | |
right_tree = None | |
def left(self, tree): | |
self.left_tree = tree.children[0] | |
print(tree.data, tree) | |
def right(self, tree): | |
self.right_tree = tree.children[0] | |
print(tree.data, tree) | |
text = "df.iloc[:,]" | |
parse_tree = parser.parse(text) | |
print(parse_tree.pretty()) | |
visitor = LeftRightVisitor() | |
visitor.visit(parse_tree) | |
print('-----') | |
class IndexTranslator(Transformer): | |
def __init__(self, side, *args, **kwargs): | |
self.side = side | |
def start_idx(self, matches): | |
return str(int(matches[0]) + 1) if len(matches) == 1 else '1' | |
def end_idx(self, matches): | |
if len(matches) == 1: | |
return matches[0] | |
elif self.side == 'left': | |
return 'nrow(df)' | |
elif self.side == 'right': | |
return 'ncol(df)' | |
# TODO put this into a function for handling iloc (maybe it can handle regular ones too) | |
new_idx = '' | |
new_left_idx = '' | |
new_right_idx = '' | |
if visitor.left_tree == None and visitor.right_tree == None: | |
new_idx = '' | |
if visitor.left_tree != None: | |
new_left_idx = IndexTranslator('left').transform(visitor.left_tree) | |
new_left_idx = new_left_idx.children[0] + ':' + new_left_idx.children[1] | |
if visitor.right_tree != None: | |
new_right_idx = IndexTranslator('right').transform(visitor.right_tree) | |
new_right_idx = new_right_idx.children[0] + ':' + new_right_idx.children[1] | |
print("df["+new_left_idx+","+new_right_idx+"]") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment