Skip to content

Instantly share code, notes, and snippets.

@leontiy
Last active August 29, 2015 14:01
Show Gist options
  • Save leontiy/16b105c9398b23a48269 to your computer and use it in GitHub Desktop.
Save leontiy/16b105c9398b23a48269 to your computer and use it in GitHub Desktop.
Merge localisation files
#!/usr/bin/python
import re
import sys
import codecs
generated_regex = re.compile(r"/\*(?P<comment>.*?)\*/[\n\r]*\"(?P<key>.*?)\"\s*=\s*\"(?P<value>.*?)\";", re.S)
handcrafted_regex = re.compile(r"\"(?P<key>.*?)\"\s*=\s*\"(?P<value>.*?)\";(?P<comment>)", re.S)
def parse(file_name, regex, encoding = "utf-8"):
with codecs.open(file_name, mode="rb", encoding = encoding) as file:
contents = file.read();
return [(m.group("key"), m.group("value"), m.group("comment")) for m in re.finditer(regex, contents)]
def merge(old, new):
old_val = dict([(v[0], v[1]) for v in old])
new_val = dict([(v[0], v[1]) for v in new])
old_comment = dict([(v[0], v[2]) for v in old])
new_comment = dict([(v[0], v[2]) for v in new])
all_keys = set([v[0] for v in old] + [v[0] for v in new])
return sorted([(key, \
new_val.has_key(key) and new_val[key] or old_val.has_key(key) and old_val[key], \
new_comment.has_key(key) and new_comment[key] or old_comment.has_key(key) and old_comment[key] or " No comment provided by engineer. "
) for key in all_keys], key=lambda tup: tup[2])
def genstrings_print_triplet(file, triplet):
print >> file, """/*%s*/\n"%s" = "%s";\n""" % (triplet[2], triplet[0], triplet[1])
def csv_print_triplet(file, triplet):
print >> file, """%s\t%s\t%s""" % (triplet[0], triplet[2], triplet[1])
def print_content(file_name, triplets, formatter = genstrings_print_triplet):
with codecs.open(file_name, "w", encoding="utf-8") as file:
for t in triplets:
formatter(file, t)
def main():
if len(sys.argv) != 4:
print "This script merges existing and new localization files by adding new key-value pairs (replacing old ones)."
print "Usage: merge-localizations <old file> <new file> <output file>"
return
old_content = parse(sys.argv[1], generated_regex)
print 'Old:', len(old_content)
new_content = parse(sys.argv[2], handcrafted_regex)
print 'New', len(new_content)
#print set([v[0] for v in old_content]) - set([v[0] for v in new_content])
merged_content = merge(old_content, new_content)
print_content(sys.argv[3], merged_content)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment