Last active
August 29, 2015 14:23
-
-
Save ogroleg/df951cea0fe33e5e8356 to your computer and use it in GitHub Desktop.
merge docx files by Heading1 and Heading2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
from docx import Document as Document | |
from docx.oxml.text.paragraph import CT_P | |
from docx.oxml.section import CT_SectPr | |
temp2=lambda x:x.style if x.__class__==CT_P else None | |
temp3=lambda x:x if x!='None' and x else '' | |
def proceed_doc(d): | |
global res | |
t = [temp2(x) for x in d] | |
curr_num, curr_ps = None, [] | |
for x in range(len(t)): | |
if t[x] in ('Heading1', 'Heading2'): | |
if curr_num: | |
if curr_num in res: | |
raise ValueError("Key is already present:" + str(curr_num)) | |
res[curr_num]=curr_ps | |
curr_num = get_num(''.join(temp3(y.text) for y in d[x].getchildren())) | |
curr_ps = [d[x]] | |
elif curr_num: | |
curr_ps.append(d[x]) | |
if curr_num: | |
if curr_num in res: | |
raise ValueError("Key is already present:" + str(curr_num)) | |
res[curr_num] = curr_ps | |
def get_num(e): | |
res, is_it = '', False | |
for x in e: | |
if x.isdigit(): | |
is_it = True | |
res += x | |
elif is_it: | |
return int(res) | |
def main(): | |
global res,r | |
d = [Document(x).element.body.getchildren() for x in glob.glob('*.docx') if x != 'res.docx'] | |
res, r, i = {}, Document(), 1 | |
for x in d: | |
proceed_doc(x) | |
for x in sorted(res.keys()): | |
for y in res[x]: | |
r.element.body.insert(i, y) | |
i += 1 | |
r.save('res.docx') | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment