Skip to content

Instantly share code, notes, and snippets.

@filipposc5
Last active February 3, 2017 12:32
Show Gist options
  • Select an option

  • Save filipposc5/7f7d27a940b2a2abea66d613d63e403f to your computer and use it in GitHub Desktop.

Select an option

Save filipposc5/7f7d27a940b2a2abea66d613d63e403f to your computer and use it in GitHub Desktop.
some BNF/grammar/parsing fun
# refactored version purely for science
# use to parse python variables. Grammar a bit limited but worked for the sample python files I was parsing
from pyparsing import *
NAME = Word(alphanums+'_')
VALUE = Suppress(Empty()) + restOfLine
EQUALS,LBRACE,RBRACE,LPAR,RPAR = map(Suppress,"={}()")
EOL = LineEnd().suppress()
INNER_VALUE = VALUE
sample = """
import os
V1 = ( 1 )
V2 = ( 1
)
V = 1
B=3
"""
## Reading from https://github.com/ministryofjustice/vis/blob/develop/vis/settings/heroku.py etc
with open("production.py", "r") as f:
sample = f.read()
with open("heroku.py", "r") as f:
sample = f.read()
# line starts, anything follows until EOL, fails on blank lines,
line = LineStart() + SkipTo(LineEnd(), failOn=LineStart()+LineEnd()) + EOL
lines = OneOrMore(line)
right_expr1 = LBRACE + INNER_VALUE + RBRACE
right_expr2 = LPAR + INNER_VALUE + RPAR
expr = right_expr1 | right_expr2 | VALUE
equal_expr = NAME + EQUALS + expr
stmt = Word(alphanums+'_') + SkipTo(LineEnd())
vae = OneOrMore(Suppress(Group(lines)) + EOL |
Group(equal_expr) + EOL |
Suppress(Group(stmt)) + EOL
)
print vae.parseString(sample)
# This was my very first attempt, and a bit sloppier but it got the job done.
# Main weakness of this is that it is parsing line-by-line which of course there
# are much easier ways to do it.
from pyparsing import *
bnfComment = "#" + restOfLine
def readFile(filename):
with open(filename, 'r') as f:
return f.read()
def getVars(filename):
import sys
EQUALS = Literal("=").suppress()
VAR = Regex(r"[^=\n]+")
VAR_VALUE = Regex(r"[^=]+")
EMPTY_LINE = LineStart() + LineEnd()
FF = readFile(filename)
import pdb
#print FF
expr = VAR + EQUALS + restOfLine
#expr = LineStart + Word( alphas ) + Literal('=').suppress() + restOfLine
g = Group(expr)
#d = Dict(ZeroOrMore(EMPTY_LINE) + OneOrMore(g))
d = Dict(OneOrMore(g))
#d = Dict(g)
d.ignore(bnfComment)
d.ignore(EMPTY_LINE)
d1 = d.scanString(FF)
fails = []
successes = {}
for ln, FFS in enumerate(FF.splitlines()):
try:
successes.update(dict(d.parseString(FFS)))
#pdb.set_trace()
except:
#print (ln, FFS.rstrip(), 'failed')
fails.append( (ln, FFS.rstrip(), 'failed') )
return successes
#for i in d.scanString(FF):
# print i.keys()
#pdb.set_trace()
#print d1.next()[0].keys()
prod = getVars("production.py")
heroku = getVars("heroku.py")
base = getVars("base.py")
print 'exists in heroku but not in prod'
print set(heroku) - set(prod)
print 'exists in heroku and not inherited by base and doesnt exist in prod'
print set(heroku) - set(base) - set(prod)
# After heavy edits in production.py the final result was :
#
# exists in heroku but not in prod
# set(['LOGGING ', 'S3_BUCKET_NAME ', 'CACHES ', 'DJ_DATABASE_URL ', 'STATICFILES_STORAGE ', 'S3_SECRET_ACCESS_KEY_ID ', 'DATABASES ', 'INSTALLED_APPS ', 'S3_ACCESS_KEY_ID ', 'redis_url '])
# exists in heroku and not inherited by base and doesnt exist in prod
# set(['LOGGING ', 'S3_SECRET_ACCESS_KEY_ID ', 'STATICFILES_STORAGE ', 'S3_ACCESS_KEY_ID ', 'S3_BUCKET_NAME '])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment