Created
February 16, 2015 17:06
-
-
Save mlent/7e45b67732eb18d51a30 to your computer and use it in GitHub Desktop.
Extract in-use transaction strings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import fnmatch | |
import sys | |
import os | |
from pprint import pprint | |
import json | |
in_use = [] | |
for root, dirnames, filenames in os.walk('scripts'): | |
for filename in fnmatch.filter(filenames, '*.html'): | |
for line in open(os.path.join(root, filename)): | |
# Strip non-breaking spaces | |
match = re.findall(r'{{\s*\'([0-9a-zA-Z\._]+?)\'\s*|\s*i18n\s*}}', line.replace("\xc2\xa0", " ")) | |
if match and match[0] != '': | |
in_use.append(match[0]) | |
# Flatten! | |
def iteritems_nested(d): | |
def fetch(suffixes, v0): | |
if isinstance(v0, dict): | |
for k, v in v0.items(): | |
for i in fetch(suffixes + [k], v): | |
yield i | |
else: | |
yield (suffixes, v0) | |
return fetch([], d) | |
def flatten_dict(d): | |
return dict(('.'.join(ks), v) for ks, v in iteritems_nested(d)) | |
# Now read in our dict and see what's missing | |
data = {} | |
in_dict = {} | |
not_found = [] | |
not_added = [] | |
with open('locales/en.json') as data_file: | |
data = json.load(data_file) | |
in_dict = flatten_dict(data) | |
for key in in_dict: | |
if not key in in_use: | |
not_found.append(key) | |
for key in in_use: | |
if not key in in_dict: | |
not_added.append(key) | |
print "Fragments in dictionary, not in UI" | |
output_not_found = sorted(set(not_found)) | |
print pprint(output_not_found) | |
print len(output_not_found) | |
print "\n\n" | |
print "Fragments in UI, not in dictionary" | |
print pprint(sorted(set(not_added))) | |
print len(sorted(set(not_added))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment