Last active
November 15, 2020 14:36
-
-
Save joeld1/6b86dfd1bc1128b3bde10a857a7caaa2 to your computer and use it in GitHub Desktop.
Collection of python-docx hyperlink snippets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import docx | |
from docx.text.run import Run | |
def add_hyperlink_into_run(paragraph, run, url): | |
""" | |
https://github.com/python-openxml/python-docx/issues/610#issuecomment-458289054 | |
""" | |
runs = paragraph.runs | |
for i in range(len(runs)): | |
if runs[i].text == run.text: | |
break | |
# --- This gets access to the document.xml.rels file and gets a new relation id value --- | |
part = paragraph.part | |
r_id = part.relate_to( | |
url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True | |
) | |
# --- Create the w:hyperlink tag and add needed values --- | |
hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink') | |
hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, ) | |
hyperlink.append(run._r) | |
paragraph._p.insert(i,hyperlink) | |
run.font.color.rgb = docx.shared.RGBColor(0, 0, 255) | |
def add_hyperlink(paragraph, url, text, color, underline): | |
""" | |
https://github.com/python-openxml/python-docx/issues/74#issuecomment-261169410 | |
A function that places a hyperlink within a paragraph object. | |
:param paragraph: The paragraph we are adding the hyperlink to. | |
:param url: A string containing the required url | |
:param text: The text displayed for the url | |
:return: The hyperlink object | |
""" | |
# This gets access to the document.xml.rels file and gets a new relation id value | |
part = paragraph.part | |
r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True) | |
# Create the w:hyperlink tag and add needed values | |
hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink') | |
hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, ) | |
# Create a w:r element | |
new_run = docx.oxml.shared.OxmlElement('w:r') | |
# Create a new w:rPr element | |
rPr = docx.oxml.shared.OxmlElement('w:rPr') | |
# Add color if it is given | |
if not color is None: | |
c = docx.oxml.shared.OxmlElement('w:color') | |
c.set(docx.oxml.shared.qn('w:val'), color) | |
rPr.append(c) | |
# Remove underlining if it is requested | |
if not underline: | |
u = docx.oxml.shared.OxmlElement('w:u') | |
u.set(docx.oxml.shared.qn('w:val'), 'none') | |
rPr.append(u) | |
# Join all the xml elements together add add the required text to the w:r element | |
new_run.append(rPr) | |
new_run.text = text | |
hyperlink.append(new_run) | |
paragraph._p.append(hyperlink) | |
return hyperlink | |
######################################################################################################################################## | |
# https://github.com/python-openxml/python-docx/issues/610#issuecomment-458289054 | |
# --- this document contains one paragraph and one run with the following text: | |
# --- "Please refer to document 1 (doc. 1). More testing sentences. " | |
doc = docx.Document('testing runs.docx') | |
# --- This is the pattern to match the string '(doc. #)' | |
pattern = "\(doc. [0-9]\)." | |
for paragraph in doc.paragraphs: | |
for run in paragraph.runs: | |
matches = re.findall(pattern, run.text) | |
if matches: | |
print("Found a match!") | |
# --- this replaces the pattern we wrote above with nothing. | |
run.text = re.sub(pattern, "", run.text) | |
# --- if there are more than one instances of '(doc. #)' in the run, | |
# --- we want to replace all of them. This logic might not work perfectly | |
# --- but it's a start. | |
for match in matches: | |
new_run_element = paragraph._element._new_r() | |
run._element.addnext(new_run_element) | |
new_run = Run(new_run_element, run._parent) | |
new_run.text = match + " " | |
add_hyperlink_into_run(paragraph, new_run, "http://google.com") | |
doc.save('testing-runs-complete.docx') | |
######################################################################################################################################## | |
# https://github.com/python-openxml/python-docx/issues/74#issuecomment-261169410 | |
document = docx.Document() | |
p = document.add_paragraph() | |
#add a hyperlink with the normal formatting (blue underline) | |
hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', None, True) | |
#add a hyperlink with a custom color and no underline | |
hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', 'FF8822', False) | |
document.save('demo.docx') | |
######################################################################################################################################## | |
# https://github.com/python-openxml/python-docx/issues/720 | |
from docx.oxml.ns import qn # ---for "qualified name", aka. Clark-notation tagname--- | |
def remove_relationship_from_doc(doc, hyperlink): | |
hyperlink_rel_id = hyperlink.get(qn("r:id")) | |
document_part = document.part | |
document_part.drop_rel(hyperlink_rel_id) | |
def remove_hyperlink_from_paragraph(doc, paragraph, remove_label=True): | |
p = paragraph._p | |
hyperlink = p.xpath("./w:hyperlink")[0] | |
if remove_label: | |
p.remove(hyperlink) | |
else: | |
label_r = p.xpath("./w:hyperlink/w:r")[0] | |
hyperlink.add_previous(label_r) | |
p.remove(hyperlink) | |
remove_relationship_from_doc(doc, hyperlink) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment