bencharb · February 24, 2016 18:51
diff --git a/find_jinja_variables.py b/find_jinja_variables.py
 import string
 import re
 import jinja2

 def split_many(astring, *split_on_chars, **kwargs):
    """
    Split a string on many values.

    :param split_on_chars: string to split
    :type split_on_chars: basestring
    :param split_on_chars: chars to split on
    :type split_on_chars: tuple
    :param kwargs: keep_empty: keeps empty strings in result, which happens with two adjacent separators in a string
    :type kwargs: dict
    """
    keep_empty = kwargs.get('keep_empty') or False
    split_all = kwargs.get('split_all') or False
    lenchars = len(split_on_chars)
    if not lenchars:
        split_on_chars = (' ',)
    if lenchars == 1:
        result = astring.split(split_on_chars[0])
    else:
        lastchar = split_on_chars[-1]
        result = reduce(lambda st, ch: st.replace(ch,lastchar), split_on_chars[:-1], astring).split(lastchar)
    if not keep_empty:
        result = filter(lambda x: x is not '', result)
    return result

 def test_split_many():
    thestr = 'a_string with.many:_separators. '
    split_chars = [' ',':', ',', '_', '.']
    
    try:
        expected = ['a', 'string', 'with', 'many', 'separators']
        assert expected == split_many(thestr, *split_chars)

        expected = ['a_string', 'with.many:_separators.']
        assert expected == split_many(thestr)

        expected = ['a', 'string', 'with', 'many', 'separators']
        assert expected == split_many(thestr, *split_chars)

        expected = ['a', 'string', 'with', 'many', '', 'separators', '', '']
        assert expected == split_many(thestr, *split_chars, keep_empty=True)
    except AssertionError:
        print 'test failed'
        raise
    else:
        print 'test passed'
 # test_split_many()

 acceptable_punc = set('#$._-')

 all_punctuation = set(string.punctuation)

 ignore_punc = all_punctuation.difference(acceptable_punc)

 def get_jinja_reserved_words():
    logic = set('and or not'.split())
    filters = set(jinja2.filters.FILTERS.keys())
    operators = set('in is'.split())
    tests = set(jinja2.tests.TESTS.keys())
    funcs = set('range lipsum dict cycler'.split())
    pythons = set('if then else elif endif with while not for each'.split())
    locs = locals()
    localsets = [s for lbl, s in locs.iteritems() if isinstance(s, set)]
    flatset = set()
    for s in localsets:
        flatset = flatset.union(s)
    return flatset

 JINJA_WORDS = get_jinja_reserved_words()

 def is_sequence_of_punctuation_chars(s):
    for word in s.split():
        if len(set(word).difference(all_punctuation)) == 0:
            return True
        else:
            return False




 def find_jinja_vars_in_string(astring):
    quotes = set('\'')
    quotes.add('"')
    removeable_chars = ignore_punc.difference(quotes)
    
    remove_index_keys = lambda k: re.sub('[\[0-9*\]*]', ' ', k)
    remove_only_punctuation_chars = lambda k: ' '.join(split_many(k, *ignore_punc, **{'keep_empty':False}))
    remove_sequence_punctuation_chars = lambda k: ' '.join(filter(lambda c: not is_sequence_of_punctuation_chars(c), k.split()))
        
    string_without_punc = remove_index_keys(astring)
    string_without_punc = ' '.join(split_many(string_without_punc, *removeable_chars, **{'keep_empty':False}))
    string_without_punc = remove_only_punctuation_chars(string_without_punc)
    string_without_punc = remove_sequence_punctuation_chars(string_without_punc)

    parts = set(string_without_punc.split())
    non_jinja_words_dirty = parts.difference(JINJA_WORDS)
    chars_to_remove_this_time = quotes
    non_jinja_words_clean = set()
    for word in non_jinja_words_dirty:
        parts = split_many(word, *chars_to_remove_this_time, **{'keep_empty':False})
        newword = ' '.join(parts).split()
        if not newword:
            continue
        newword = newword[0]
        if not newword or newword in chars_to_remove_this_time:
            continue
        non_jinja_words_clean.add(newword)
    return non_jinja_words_clean

 def test_find_jinja_vars_in_string():
    expected = {'URLSEP', 'activity.folder', 'application.name', 'gcs.name'}
    result = find_jinja_vars_in_string("{{[application.name, activity.folder, gcs.name]|reject('undefined')|join(URLSEP) ---  === .. .}} -- ...")
    assert expected == result

 test_find_jinja_vars_in_string()
	import string
	import re
	import jinja2

	def split_many(astring, split_on_chars, *kwargs):
	"""
	Split a string on many values.

	:param split_on_chars: string to split
	:type split_on_chars: basestring
	:param split_on_chars: chars to split on
	:type split_on_chars: tuple
	:param kwargs: keep_empty: keeps empty strings in result, which happens with two adjacent separators in a string
	:type kwargs: dict
	"""
	keep_empty = kwargs.get('keep_empty') or False
	split_all = kwargs.get('split_all') or False
	lenchars = len(split_on_chars)
	if not lenchars:
	split_on_chars = (' ',)
	if lenchars == 1:
	result = astring.split(split_on_chars[0])
	else:
	lastchar = split_on_chars[-1]
	result = reduce(lambda st, ch: st.replace(ch,lastchar), split_on_chars[:-1], astring).split(lastchar)
	if not keep_empty:
	result = filter(lambda x: x is not '', result)
	return result

	def test_split_many():
	thestr = 'a_string with.many:_separators. '
	split_chars = [' ',':', ',', '_', '.']

	try:
	expected = ['a', 'string', 'with', 'many', 'separators']
	assert expected == split_many(thestr, *split_chars)

	expected = ['a_string', 'with.many:_separators.']
	assert expected == split_many(thestr)

	expected = ['a', 'string', 'with', 'many', 'separators']
	assert expected == split_many(thestr, *split_chars)

	expected = ['a', 'string', 'with', 'many', '', 'separators', '', '']
	assert expected == split_many(thestr, *split_chars, keep_empty=True)
	except AssertionError:
	print 'test failed'
	raise
	else:
	print 'test passed'
	# test_split_many()

	acceptable_punc = set('#$._-')

	all_punctuation = set(string.punctuation)

	ignore_punc = all_punctuation.difference(acceptable_punc)

	def get_jinja_reserved_words():
	logic = set('and or not'.split())
	filters = set(jinja2.filters.FILTERS.keys())
	operators = set('in is'.split())
	tests = set(jinja2.tests.TESTS.keys())
	funcs = set('range lipsum dict cycler'.split())
	pythons = set('if then else elif endif with while not for each'.split())
	locs = locals()
	localsets = [s for lbl, s in locs.iteritems() if isinstance(s, set)]
	flatset = set()
	for s in localsets:
	flatset = flatset.union(s)
	return flatset

	JINJA_WORDS = get_jinja_reserved_words()

	def is_sequence_of_punctuation_chars(s):
	for word in s.split():
	if len(set(word).difference(all_punctuation)) == 0:
	return True
	else:
	return False




	def find_jinja_vars_in_string(astring):
	quotes = set('\'')
	quotes.add('"')
	removeable_chars = ignore_punc.difference(quotes)

	remove_index_keys = lambda k: re.sub('[\[0-9\]]', ' ', k)
	remove_only_punctuation_chars = lambda k: ' '.join(split_many(k, ignore_punc, *{'keep_empty':False}))
	remove_sequence_punctuation_chars = lambda k: ' '.join(filter(lambda c: not is_sequence_of_punctuation_chars(c), k.split()))

	string_without_punc = remove_index_keys(astring)
	string_without_punc = ' '.join(split_many(string_without_punc, removeable_chars, *{'keep_empty':False}))
	string_without_punc = remove_only_punctuation_chars(string_without_punc)
	string_without_punc = remove_sequence_punctuation_chars(string_without_punc)

	parts = set(string_without_punc.split())
	non_jinja_words_dirty = parts.difference(JINJA_WORDS)
	chars_to_remove_this_time = quotes
	non_jinja_words_clean = set()
	for word in non_jinja_words_dirty:
	parts = split_many(word, chars_to_remove_this_time, *{'keep_empty':False})
	newword = ' '.join(parts).split()
	if not newword:
	continue
	newword = newword[0]
	if not newword or newword in chars_to_remove_this_time:
	continue
	non_jinja_words_clean.add(newword)
	return non_jinja_words_clean

	def test_find_jinja_vars_in_string():
	expected = {'URLSEP', 'activity.folder', 'application.name', 'gcs.name'}
	result = find_jinja_vars_in_string("{{[application.name, activity.folder, gcs.name]\|reject('undefined')\|join(URLSEP) --- === .. .}} -- ...")
	assert expected == result

	test_find_jinja_vars_in_string()
No results found