-
-
Save adejones/a6d42984f66ea9990d78974531863bee to your computer and use it in GitHub Desktop.
def docx_replace(doc, data): | |
paragraphs = list(doc.paragraphs) | |
for t in doc.tables: | |
for row in t.rows: | |
for cell in row.cells: | |
for paragraph in cell.paragraphs: | |
paragraphs.append(paragraph) | |
for p in paragraphs: | |
for key, val in data.items(): | |
key_name = '${{{}}}'.format(key) # use placeholders in the form ${PlaceholderName} | |
if key_name in p.text: | |
inline = p.runs | |
# Replace strings and retain the same style. | |
# The text to be replaced can be split over several runs so | |
# search through, identify which runs need to have text replaced | |
# then replace the text in those identified | |
started = False | |
key_index = 0 | |
# found_runs is a list of (inline index, index of match, length of match) | |
found_runs = list() | |
found_all = False | |
replace_done = False | |
for i in range(len(inline)): | |
# case 1: found in single run so short circuit the replace | |
if key_name in inline[i].text and not started: | |
found_runs.append((i, inline[i].text.find(key_name), len(key_name))) | |
text = inline[i].text.replace(key_name, str(val)) | |
inline[i].text = text | |
replace_done = True | |
found_all = True | |
break | |
if key_name[key_index] not in inline[i].text and not started: | |
# keep looking ... | |
continue | |
# case 2: search for partial text, find first run | |
if key_name[key_index] in inline[i].text and inline[i].text[-1] in key_name and not started: | |
# check sequence | |
start_index = inline[i].text.find(key_name[key_index]) | |
check_length = len(inline[i].text) | |
for text_index in range(start_index, check_length): | |
if inline[i].text[text_index] != key_name[key_index]: | |
# no match so must be false positive | |
break | |
if key_index == 0: | |
started = True | |
chars_found = check_length - start_index | |
key_index += chars_found | |
found_runs.append((i, start_index, chars_found)) | |
if key_index != len(key_name): | |
continue | |
else: | |
# found all chars in key_name | |
found_all = True | |
break | |
# case 2: search for partial text, find subsequent run | |
if key_name[key_index] in inline[i].text and started and not found_all: | |
# check sequence | |
chars_found = 0 | |
check_length = len(inline[i].text) | |
for text_index in range(0, check_length): | |
if inline[i].text[text_index] == key_name[key_index]: | |
key_index += 1 | |
chars_found += 1 | |
else: | |
break | |
# no match so must be end | |
found_runs.append((i, 0, chars_found)) | |
if key_index == len(key_name): | |
found_all = True | |
break | |
if found_all and not replace_done: | |
for i, item in enumerate(found_runs): | |
index, start, length = [t for t in item] | |
if i == 0: | |
text = inline[index].text.replace(inline[index].text[start:start + length], str(val)) | |
inline[index].text = text | |
else: | |
text = inline[index].text.replace(inline[index].text[start:start + length], '') | |
inline[index].text = text | |
# print(p.text) | |
# usage | |
doc = docx.Document('path/to/template.docx') | |
docx_replace(doc, dict(ItemOne='replacement text', ItemTwo="Some replacement text\nand some more") | |
doc.save('path/to/destination.docx') |
Hello folks! I tried this code for a complex document I have, but unfortunately I got an out of index exception. But based on this solution I developed a new one, put it in a repo and published a new package on PyPI. Feel free to check all the information there and contribute. I described the strategy I used to ensure that the format is maintained.
The repo is: https://github.com/ivanbicalho/python-docx-replace
But if you prefer, you can install and use it:
pip3 install python-docx-replace
from python_docx_replace.docx_replace import docx_replace
# get your document using python-docx
doc = Document("document.docx")
# call the replace function with your key value pairs
docx_replace(doc, name="Ivan", phone="+55123456789")
# do whatever you want after that, usually save the document
doc.save("replaced.docx")
Hello folks! I tried this code for a complex document I have, but unfortunately I got an out of index exception. But based on this solution I developed a new one, put it in a repo and published a new package on PyPI. Feel free to check all the information there and contribute. I described the strategy I used to ensure that the format is maintained.
The repo is: https://github.com/ivanbicalho/python-docx-replace
But if you prefer, you can install and use it:
pip3 install python-docx-replace
from python_docx_replace.docx_replace import docx_replace # get your document using python-docx doc = Document("document.docx") # call the replace function with your key value pairs docx_replace(doc, name="Ivan", phone="+55123456789") # do whatever you want after that, usually save the document doc.save("replaced.docx")
This does not work with tables in header/footer/body
Hey @marcopin, it was fixed in 0.4.2
. https://pypi.org/project/python-docx-replace
Hey @marcopin, it was fixed in
0.4.2
. https://pypi.org/project/python-docx-replace
Great news!
Good job!
This is awesome, thank you!
There is a npm pkg name edit-office-file which can search & replace multiple text strings inside a DOCX file as well other office files.
Update to support words cross multiple runs
https://gist.github.com/heimoshuiyu/671a4dfbd13f7c279e85224a5b6726c0