filipposc5 · February 3, 2017 12:32
diff --git a/refactored-pars-for-science.py b/refactored-pars-for-science.py
 # refactored version purely for science
 # use to parse python variables. Grammar a bit limited but worked for the sample python files I was parsing

 from pyparsing import *


 NAME = Word(alphanums+'_')
 VALUE = Suppress(Empty()) + restOfLine

 EQUALS,LBRACE,RBRACE,LPAR,RPAR = map(Suppress,"={}()")
 EOL = LineEnd().suppress()
 INNER_VALUE = VALUE

 sample = """
 import os

 V1 = ( 1 )

 V2 = ( 1
 )

 V = 1
 B=3
 """

 ## Reading from https://github.com/ministryofjustice/vis/blob/develop/vis/settings/heroku.py etc
 with open("production.py", "r") as f:
    sample = f.read()

 with open("heroku.py", "r") as f:
    sample = f.read()

 # line starts, anything follows until EOL, fails on blank lines,
 line = LineStart() + SkipTo(LineEnd(), failOn=LineStart()+LineEnd()) + EOL
 lines = OneOrMore(line)

 right_expr1 = LBRACE + INNER_VALUE + RBRACE
 right_expr2 = LPAR + INNER_VALUE + RPAR

 expr = right_expr1 | right_expr2 | VALUE
 equal_expr = NAME + EQUALS + expr

 stmt = Word(alphanums+'_') + SkipTo(LineEnd())

 vae = OneOrMore(Suppress(Group(lines)) + EOL |
                Group(equal_expr) + EOL |
                Suppress(Group(stmt)) + EOL
                )

 print vae.parseString(sample)

diff --git a/zz-first-draft-slightly-sloppier.py b/zz-first-draft-slightly-sloppier.py
 # This was my very first attempt, and a bit sloppier but it got the job done. 
 # Main weakness of this is that it is parsing line-by-line which of course there
 # are much easier ways to do it.

 from pyparsing import *


 bnfComment = "#" + restOfLine

 def readFile(filename):
    with open(filename, 'r') as f:
        return f.read()

 def getVars(filename):
    import sys
    EQUALS = Literal("=").suppress()
    VAR = Regex(r"[^=\n]+")
    VAR_VALUE = Regex(r"[^=]+")
    EMPTY_LINE = LineStart() + LineEnd()

    FF = readFile(filename)
    import pdb
    #print FF

    expr = VAR + EQUALS + restOfLine
    #expr = LineStart + Word( alphas ) + Literal('=').suppress() + restOfLine

    g = Group(expr)
    #d = Dict(ZeroOrMore(EMPTY_LINE) + OneOrMore(g))
    d = Dict(OneOrMore(g))
    #d = Dict(g)
    d.ignore(bnfComment)
    d.ignore(EMPTY_LINE)
    d1 = d.scanString(FF)
    fails = []
    successes = {}
    for ln, FFS in enumerate(FF.splitlines()):
        try:
            successes.update(dict(d.parseString(FFS)))
            #pdb.set_trace()
        except:
            #print (ln, FFS.rstrip(), 'failed')
            fails.append( (ln, FFS.rstrip(), 'failed') )
    return successes

    #for i in d.scanString(FF):
    #    print i.keys()
    #pdb.set_trace()
    #print d1.next()[0].keys()

 prod = getVars("production.py")
 heroku = getVars("heroku.py")
 base = getVars("base.py")
 print 'exists in heroku but not in prod'
 print set(heroku) - set(prod)
 print 'exists in heroku and not inherited by base and doesnt exist in prod'
 print set(heroku) - set(base) - set(prod)

 # After heavy edits in production.py the final result was : 
 #
 # exists in heroku but not in prod
 # set(['LOGGING ', 'S3_BUCKET_NAME ', 'CACHES ', 'DJ_DATABASE_URL ', 'STATICFILES_STORAGE ', 'S3_SECRET_ACCESS_KEY_ID ', 'DATABASES ', 'INSTALLED_APPS ', 'S3_ACCESS_KEY_ID ', 'redis_url '])
 # exists in heroku and not inherited by base and doesnt exist in prod
 # set(['LOGGING ', 'S3_SECRET_ACCESS_KEY_ID ', 'STATICFILES_STORAGE ', 'S3_ACCESS_KEY_ID ', 'S3_BUCKET_NAME '])
	# refactored version purely for science
	# use to parse python variables. Grammar a bit limited but worked for the sample python files I was parsing

	from pyparsing import *


	NAME = Word(alphanums+'_')
	VALUE = Suppress(Empty()) + restOfLine

	EQUALS,LBRACE,RBRACE,LPAR,RPAR = map(Suppress,"={}()")
	EOL = LineEnd().suppress()
	INNER_VALUE = VALUE

	sample = """
	import os

	V1 = ( 1 )

	V2 = ( 1
	)

	V = 1
	B=3
	"""

	## Reading from https://github.com/ministryofjustice/vis/blob/develop/vis/settings/heroku.py etc
	with open("production.py", "r") as f:
	sample = f.read()

	with open("heroku.py", "r") as f:
	sample = f.read()

	# line starts, anything follows until EOL, fails on blank lines,
	line = LineStart() + SkipTo(LineEnd(), failOn=LineStart()+LineEnd()) + EOL
	lines = OneOrMore(line)

	right_expr1 = LBRACE + INNER_VALUE + RBRACE
	right_expr2 = LPAR + INNER_VALUE + RPAR

	expr = right_expr1 \| right_expr2 \| VALUE
	equal_expr = NAME + EQUALS + expr

	stmt = Word(alphanums+'_') + SkipTo(LineEnd())

	vae = OneOrMore(Suppress(Group(lines)) + EOL \|
	Group(equal_expr) + EOL \|
	Suppress(Group(stmt)) + EOL
	)

	print vae.parseString(sample)
	# This was my very first attempt, and a bit sloppier but it got the job done.
	# Main weakness of this is that it is parsing line-by-line which of course there
	# are much easier ways to do it.

	from pyparsing import *


	bnfComment = "#" + restOfLine

	def readFile(filename):
	with open(filename, 'r') as f:
	return f.read()

	def getVars(filename):
	import sys
	EQUALS = Literal("=").suppress()
	VAR = Regex(r"[^=\n]+")
	VAR_VALUE = Regex(r"[^=]+")
	EMPTY_LINE = LineStart() + LineEnd()

	FF = readFile(filename)
	import pdb
	#print FF

	expr = VAR + EQUALS + restOfLine
	#expr = LineStart + Word( alphas ) + Literal('=').suppress() + restOfLine

	g = Group(expr)
	#d = Dict(ZeroOrMore(EMPTY_LINE) + OneOrMore(g))
	d = Dict(OneOrMore(g))
	#d = Dict(g)
	d.ignore(bnfComment)
	d.ignore(EMPTY_LINE)
	d1 = d.scanString(FF)
	fails = []
	successes = {}
	for ln, FFS in enumerate(FF.splitlines()):
	try:
	successes.update(dict(d.parseString(FFS)))
	#pdb.set_trace()
	except:
	#print (ln, FFS.rstrip(), 'failed')
	fails.append( (ln, FFS.rstrip(), 'failed') )
	return successes

	#for i in d.scanString(FF):
	# print i.keys()
	#pdb.set_trace()
	#print d1.next()[0].keys()

	prod = getVars("production.py")
	heroku = getVars("heroku.py")
	base = getVars("base.py")
	print 'exists in heroku but not in prod'
	print set(heroku) - set(prod)
	print 'exists in heroku and not inherited by base and doesnt exist in prod'
	print set(heroku) - set(base) - set(prod)

	# After heavy edits in production.py the final result was :
	#
	# exists in heroku but not in prod
	# set(['LOGGING ', 'S3_BUCKET_NAME ', 'CACHES ', 'DJ_DATABASE_URL ', 'STATICFILES_STORAGE ', 'S3_SECRET_ACCESS_KEY_ID ', 'DATABASES ', 'INSTALLED_APPS ', 'S3_ACCESS_KEY_ID ', 'redis_url '])
	# exists in heroku and not inherited by base and doesnt exist in prod
	# set(['LOGGING ', 'S3_SECRET_ACCESS_KEY_ID ', 'STATICFILES_STORAGE ', 'S3_ACCESS_KEY_ID ', 'S3_BUCKET_NAME '])