Skip to content

Instantly share code, notes, and snippets.

@kanazux
Created August 14, 2018 17:36
Show Gist options
  • Save kanazux/d2b989d48bf4170726e99032693855ec to your computer and use it in GitHub Desktop.
Save kanazux/d2b989d48bf4170726e99032693855ec to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Merge two pot files.
Read each file and create a defaultdict with data and search for fuzzy data
on the master file.
"""
import re
from argparse import ArgumentParser
from collections import defaultdict
def return_opts():
parser = ArgumentParser(add_help=True,
description="Get pot files to merge")
parser.add_argument("-f", dest="pot_file", action="store",
help="Get master file to merge.")
parser.add_argument("-m", dest="merge_file", action="store",
help="Get second file to merge.")
return parser.parse_args()
class po_data():
def __init__(self, _file, _fuzzy=False):
self.data = list(filter(None, open(_file, 'r').read().split("\n")))
self.head = []
self.msgs = defaultdict(lambda: False)
self.fuzzy = _fuzzy
if self.fuzzy:
self.msgid = "#~ msgid"
self.msgstr = "#~ msgstr"
else:
self.msgid = "msgid"
self.msgstr = "msgstr"
def get_head(self):
for line in self.data:
if bool(re.match(r"^#[:|~]", line)):
break
else:
self.head.append(line.strip())
return self.head
def get_msgs(self):
_data = self.data[len(self.get_head()):]
for idx, line in enumerate(_data):
if line.startswith(self.msgid):
_msgid = []
_msgid.append(re.split(r"^{}".format(self.msgid),
line)[-1].strip())
_cont = 1
while True:
if _data[idx + _cont].startswith(self.msgstr):
break
else:
_msgid.append(_data[idx + _cont])
_cont += 1
_msgid = "|\n|".join([l for l in _msgid])
self.msgs[_msgid] = defaultdict(lambda: False)
self.msgs[_msgid]['lines'] = []
if not self.fuzzy:
_cont = 1
while True:
_lines = _data[idx - _cont]
if not _lines.startswith('#:'):
break
else:
self.msgs[_msgid]['lines'].append(_lines)
_cont += 1
self.msgs[_msgid]['msgstr'] = []
if line.startswith(self.msgstr):
self.msgs[_msgid]['msgstr'].append(
re.split(r"^{}".format(self.msgstr), line)[-1].strip())
_cont = 1
while True:
try:
_msgstr = _data[idx + _cont]
if _msgstr.startswith("#") or _msgstr.startswith(
self.msgid):
break
else:
self.msgs[_msgid]['msgstr'].append(_msgstr)
_cont += 1
except IndexError:
break
return self.msgs
def merge_files(_master_file, _merge_file):
_master = po_data(_master_file).get_msgs()
_fuzzy = po_data(_master_file, True).get_msgs()
_head = po_data(_master_file).get_head()
_merge = po_data(_merge_file).get_msgs()
quote = re.compile(r'([^"|\\])(")([^"]|;)')
double_quote = re.compile(r'([^\ |\\])("")')
_diff = [_id for _id in _merge if _id in [_idx for _idx in _master]]
if len(_diff) != 0:
for _id in _diff:
for line in _merge[_id]['lines']:
_master[_id]['lines'].append(line)
_diff = [_id for _id in _merge if _id not in [_idx for _idx in _master]]
_diff_fuzzy = [_id for _id in _diff if _id in [_idx for _idx in _fuzzy]]
for _id in _diff:
if _id in _diff_fuzzy:
for msg in _fuzzy[_id]['msgstr']:
if msg != '""':
_merge[_id]['msgstr'].append(msg)
del _fuzzy[_id]
_master[_id] = _merge[_id]
with open('new_file.po', 'a') as new_po:
if len(_head) != 0:
for head in _head:
new_po.write("{}\n".format(head))
new_po.write("\n")
for _id in _master:
if len(_master[_id]['lines']) > 0:
for line in _master[_id]['lines']:
new_po.write("{}\n".format(
double_quote.sub(r'\1\""',
quote.sub(r'\1\\"\3', line))))
msgids = _id.split("|\n|")
new_po.write("msgid {}\n".format(
double_quote.sub(r'\1\""', quote.sub(r'\1\\"\3', msgids[0]))))
if len(msgids) > 1:
for msgid in msgids[1:]:
new_po.write("{}\n".format(
double_quote.sub(r'\1\""', quote.sub(r'\1\\"\3',
msgid))))
if len(_master[_id]['msgstr']) > 0:
new_po.write("msgstr {}\n".format(
double_quote.sub(r'\1\""',
quote.sub(r'\1\\"\3',
_master[_id]['msgstr'][0]))))
if len(_master[_id]['msgstr']) > 1:
for msg in _master[_id]['msgstr'][1:]:
new_po.write("{}\n".format(
double_quote.sub(r'\1\""',
quote.sub(r'\1\\"\3', msg))))
new_po.write("\n")
if len(_fuzzy) > 0:
for _id in _fuzzy:
lines = _id.split("|\n|")
new_po.write("#~ msgid {}\n".format(lines[0]))
if len(lines) > 1:
for line in lines[1:]:
new_po.write("{}\n".format(line))
new_po.write("#~ msgstr {}\n".format(_fuzzy[_id]['msgstr'][0]))
if len(_fuzzy[_id]['msgstr']) > 1:
for line in _fuzzy[_id]['msgstr']:
new_po.write("{}\n".format(line))
new_po.write("\n")
if __name__ == "__main__":
opts = return_opts()
merge_files(opts.pot_file, opts.merge_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment