Created
August 14, 2018 17:36
-
-
Save kanazux/d2b989d48bf4170726e99032693855ec to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Merge two pot files. | |
Read each file and create a defaultdict with data and search for fuzzy data | |
on the master file. | |
""" | |
import re | |
from argparse import ArgumentParser | |
from collections import defaultdict | |
def return_opts(): | |
parser = ArgumentParser(add_help=True, | |
description="Get pot files to merge") | |
parser.add_argument("-f", dest="pot_file", action="store", | |
help="Get master file to merge.") | |
parser.add_argument("-m", dest="merge_file", action="store", | |
help="Get second file to merge.") | |
return parser.parse_args() | |
class po_data(): | |
def __init__(self, _file, _fuzzy=False): | |
self.data = list(filter(None, open(_file, 'r').read().split("\n"))) | |
self.head = [] | |
self.msgs = defaultdict(lambda: False) | |
self.fuzzy = _fuzzy | |
if self.fuzzy: | |
self.msgid = "#~ msgid" | |
self.msgstr = "#~ msgstr" | |
else: | |
self.msgid = "msgid" | |
self.msgstr = "msgstr" | |
def get_head(self): | |
for line in self.data: | |
if bool(re.match(r"^#[:|~]", line)): | |
break | |
else: | |
self.head.append(line.strip()) | |
return self.head | |
def get_msgs(self): | |
_data = self.data[len(self.get_head()):] | |
for idx, line in enumerate(_data): | |
if line.startswith(self.msgid): | |
_msgid = [] | |
_msgid.append(re.split(r"^{}".format(self.msgid), | |
line)[-1].strip()) | |
_cont = 1 | |
while True: | |
if _data[idx + _cont].startswith(self.msgstr): | |
break | |
else: | |
_msgid.append(_data[idx + _cont]) | |
_cont += 1 | |
_msgid = "|\n|".join([l for l in _msgid]) | |
self.msgs[_msgid] = defaultdict(lambda: False) | |
self.msgs[_msgid]['lines'] = [] | |
if not self.fuzzy: | |
_cont = 1 | |
while True: | |
_lines = _data[idx - _cont] | |
if not _lines.startswith('#:'): | |
break | |
else: | |
self.msgs[_msgid]['lines'].append(_lines) | |
_cont += 1 | |
self.msgs[_msgid]['msgstr'] = [] | |
if line.startswith(self.msgstr): | |
self.msgs[_msgid]['msgstr'].append( | |
re.split(r"^{}".format(self.msgstr), line)[-1].strip()) | |
_cont = 1 | |
while True: | |
try: | |
_msgstr = _data[idx + _cont] | |
if _msgstr.startswith("#") or _msgstr.startswith( | |
self.msgid): | |
break | |
else: | |
self.msgs[_msgid]['msgstr'].append(_msgstr) | |
_cont += 1 | |
except IndexError: | |
break | |
return self.msgs | |
def merge_files(_master_file, _merge_file): | |
_master = po_data(_master_file).get_msgs() | |
_fuzzy = po_data(_master_file, True).get_msgs() | |
_head = po_data(_master_file).get_head() | |
_merge = po_data(_merge_file).get_msgs() | |
quote = re.compile(r'([^"|\\])(")([^"]|;)') | |
double_quote = re.compile(r'([^\ |\\])("")') | |
_diff = [_id for _id in _merge if _id in [_idx for _idx in _master]] | |
if len(_diff) != 0: | |
for _id in _diff: | |
for line in _merge[_id]['lines']: | |
_master[_id]['lines'].append(line) | |
_diff = [_id for _id in _merge if _id not in [_idx for _idx in _master]] | |
_diff_fuzzy = [_id for _id in _diff if _id in [_idx for _idx in _fuzzy]] | |
for _id in _diff: | |
if _id in _diff_fuzzy: | |
for msg in _fuzzy[_id]['msgstr']: | |
if msg != '""': | |
_merge[_id]['msgstr'].append(msg) | |
del _fuzzy[_id] | |
_master[_id] = _merge[_id] | |
with open('new_file.po', 'a') as new_po: | |
if len(_head) != 0: | |
for head in _head: | |
new_po.write("{}\n".format(head)) | |
new_po.write("\n") | |
for _id in _master: | |
if len(_master[_id]['lines']) > 0: | |
for line in _master[_id]['lines']: | |
new_po.write("{}\n".format( | |
double_quote.sub(r'\1\""', | |
quote.sub(r'\1\\"\3', line)))) | |
msgids = _id.split("|\n|") | |
new_po.write("msgid {}\n".format( | |
double_quote.sub(r'\1\""', quote.sub(r'\1\\"\3', msgids[0])))) | |
if len(msgids) > 1: | |
for msgid in msgids[1:]: | |
new_po.write("{}\n".format( | |
double_quote.sub(r'\1\""', quote.sub(r'\1\\"\3', | |
msgid)))) | |
if len(_master[_id]['msgstr']) > 0: | |
new_po.write("msgstr {}\n".format( | |
double_quote.sub(r'\1\""', | |
quote.sub(r'\1\\"\3', | |
_master[_id]['msgstr'][0])))) | |
if len(_master[_id]['msgstr']) > 1: | |
for msg in _master[_id]['msgstr'][1:]: | |
new_po.write("{}\n".format( | |
double_quote.sub(r'\1\""', | |
quote.sub(r'\1\\"\3', msg)))) | |
new_po.write("\n") | |
if len(_fuzzy) > 0: | |
for _id in _fuzzy: | |
lines = _id.split("|\n|") | |
new_po.write("#~ msgid {}\n".format(lines[0])) | |
if len(lines) > 1: | |
for line in lines[1:]: | |
new_po.write("{}\n".format(line)) | |
new_po.write("#~ msgstr {}\n".format(_fuzzy[_id]['msgstr'][0])) | |
if len(_fuzzy[_id]['msgstr']) > 1: | |
for line in _fuzzy[_id]['msgstr']: | |
new_po.write("{}\n".format(line)) | |
new_po.write("\n") | |
if __name__ == "__main__": | |
opts = return_opts() | |
merge_files(opts.pot_file, opts.merge_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment