-
-
Save adejones/a6d42984f66ea9990d78974531863bee to your computer and use it in GitHub Desktop.
def docx_replace(doc, data): | |
paragraphs = list(doc.paragraphs) | |
for t in doc.tables: | |
for row in t.rows: | |
for cell in row.cells: | |
for paragraph in cell.paragraphs: | |
paragraphs.append(paragraph) | |
for p in paragraphs: | |
for key, val in data.items(): | |
key_name = '${{{}}}'.format(key) # use placeholders in the form ${PlaceholderName} | |
if key_name in p.text: | |
inline = p.runs | |
# Replace strings and retain the same style. | |
# The text to be replaced can be split over several runs so | |
# search through, identify which runs need to have text replaced | |
# then replace the text in those identified | |
started = False | |
key_index = 0 | |
# found_runs is a list of (inline index, index of match, length of match) | |
found_runs = list() | |
found_all = False | |
replace_done = False | |
for i in range(len(inline)): | |
# case 1: found in single run so short circuit the replace | |
if key_name in inline[i].text and not started: | |
found_runs.append((i, inline[i].text.find(key_name), len(key_name))) | |
text = inline[i].text.replace(key_name, str(val)) | |
inline[i].text = text | |
replace_done = True | |
found_all = True | |
break | |
if key_name[key_index] not in inline[i].text and not started: | |
# keep looking ... | |
continue | |
# case 2: search for partial text, find first run | |
if key_name[key_index] in inline[i].text and inline[i].text[-1] in key_name and not started: | |
# check sequence | |
start_index = inline[i].text.find(key_name[key_index]) | |
check_length = len(inline[i].text) | |
for text_index in range(start_index, check_length): | |
if inline[i].text[text_index] != key_name[key_index]: | |
# no match so must be false positive | |
break | |
if key_index == 0: | |
started = True | |
chars_found = check_length - start_index | |
key_index += chars_found | |
found_runs.append((i, start_index, chars_found)) | |
if key_index != len(key_name): | |
continue | |
else: | |
# found all chars in key_name | |
found_all = True | |
break | |
# case 2: search for partial text, find subsequent run | |
if key_name[key_index] in inline[i].text and started and not found_all: | |
# check sequence | |
chars_found = 0 | |
check_length = len(inline[i].text) | |
for text_index in range(0, check_length): | |
if inline[i].text[text_index] == key_name[key_index]: | |
key_index += 1 | |
chars_found += 1 | |
else: | |
break | |
# no match so must be end | |
found_runs.append((i, 0, chars_found)) | |
if key_index == len(key_name): | |
found_all = True | |
break | |
if found_all and not replace_done: | |
for i, item in enumerate(found_runs): | |
index, start, length = [t for t in item] | |
if i == 0: | |
text = inline[index].text.replace(inline[index].text[start:start + length], str(val)) | |
inline[index].text = text | |
else: | |
text = inline[index].text.replace(inline[index].text[start:start + length], '') | |
inline[index].text = text | |
# print(p.text) | |
# usage | |
doc = docx.Document('path/to/template.docx') | |
docx_replace(doc, dict(ItemOne='replacement text', ItemTwo="Some replacement text\nand some more") | |
doc.save('path/to/destination.docx') |
this doesn't work
this doesn't work
@handhikadj I haven’t needed this code for a couple of years so the docx module may have changed since writing. What are you referring to?
Greetings! Do you have any updates on this code? I've been trying to use this but I'm afraid it doesn't work anymore. I tried to use the code with the usage described in the comments, but it simply does nothing.
Redirected from https://stackoverflow.com/a/55733040
Code snipped works great thank you! 😄
Updated to support multiple replaces of the same KEY in a PARAGRAPH
https://gist.github.com/GastonDonnet/cff16e773a6245e536f957bd8b5eba6c
Update to support words cross multiple runs
https://gist.github.com/heimoshuiyu/671a4dfbd13f7c279e85224a5b6726c0
Hello folks! I tried this code for a complex document I have, but unfortunately I got an out of index exception. But based on this solution I developed a new one, put it in a repo and published a new package on PyPI. Feel free to check all the information there and contribute. I described the strategy I used to ensure that the format is maintained.
The repo is: https://github.com/ivanbicalho/python-docx-replace
But if you prefer, you can install and use it:
pip3 install python-docx-replace
from python_docx_replace.docx_replace import docx_replace
# get your document using python-docx
doc = Document("document.docx")
# call the replace function with your key value pairs
docx_replace(doc, name="Ivan", phone="+55123456789")
# do whatever you want after that, usually save the document
doc.save("replaced.docx")
Hello folks! I tried this code for a complex document I have, but unfortunately I got an out of index exception. But based on this solution I developed a new one, put it in a repo and published a new package on PyPI. Feel free to check all the information there and contribute. I described the strategy I used to ensure that the format is maintained.
The repo is: https://github.com/ivanbicalho/python-docx-replace
But if you prefer, you can install and use it:
pip3 install python-docx-replace
from python_docx_replace.docx_replace import docx_replace # get your document using python-docx doc = Document("document.docx") # call the replace function with your key value pairs docx_replace(doc, name="Ivan", phone="+55123456789") # do whatever you want after that, usually save the document doc.save("replaced.docx")
This does not work with tables in header/footer/body
Hey @marcopin, it was fixed in 0.4.2
. https://pypi.org/project/python-docx-replace
Hey @marcopin, it was fixed in
0.4.2
. https://pypi.org/project/python-docx-replace
Great news!
Good job!
This is awesome, thank you!
There is a npm pkg name edit-office-file which can search & replace multiple text strings inside a DOCX file as well other office files.
Hi adejones, I added a check to your code to verify whether all the strings were replaced.
I cannot do a pull request on a gist, but in my fork you can see what I added for your consideration.
Best, Christien