Created
March 2, 2015 20:36
-
-
Save scanny/4476085aa5f57b8b4fc0 to your computer and use it in GitHub Desktop.
Code to translate RestructuredText into Microsoft Word document using python-docx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
""" | |
Helper objects for rendering to .docx format. | |
""" | |
from __future__ import ( | |
absolute_import, division, print_function, unicode_literals | |
) | |
from docutils import core | |
from lxml import etree | |
class RstRenderer(object): | |
""" | |
Service class that knows how to render a RestructuredText string to | |
a python-docx Document object. | |
""" | |
def __init__(self, blkcntnr, rst, style_overrides={}): | |
self._blkcntnr = blkcntnr | |
self._rst = rst | |
self._style_overrides = style_overrides | |
def render(self): | |
""" | |
Parse the RestructuredText in *rst* and render it into *blkcntnr* as | |
paragraphs, bullets, etc., including recognizing and rendering bold | |
and italic runs within block elements. | |
""" | |
self._render_container(self._rst_etree) | |
@property | |
def _styles(self): | |
""" | |
The dict providing lookup for style names for this RST document. | |
""" | |
if not hasattr(self, '_styles_'): | |
self._styles_ = { | |
'h1': 'Heading 1', | |
'p': 'Body Text', | |
'li': 'List Bullet', | |
'lc': 'List Continue', | |
'b': 'Strong', | |
'i': 'Emphasis', | |
} | |
self._styles_.update(self._style_overrides) | |
return self._styles_ | |
def _render_container(self, container): | |
""" | |
Render each element in *container* in turn. | |
""" | |
for element in container: | |
tag = element.tag | |
if tag == 'section': | |
self._render_container(element) | |
elif tag == 'title': | |
self._render_paragraph(element, self._styles['h1']) | |
elif tag == 'paragraph': | |
self._render_paragraph(element, self._styles['p']) | |
elif tag == 'bullet_list': | |
self._render_bullet_list(element) | |
else: | |
raise NotImplementedError('unrecognized tag %s' % tag) | |
@property | |
def _rst_etree(self): | |
""" | |
Return the root element of a RestructuredText XML document produced by | |
converting *rst* to XML and then parsing that XML using lxml. | |
""" | |
def normalize_whitespace(elm): | |
if elm.text is not None: | |
elm.text = elm.text.replace('\n', ' ') | |
for child in elm: | |
normalize_whitespace(child) | |
if elm.tail is not None: | |
elm.tail = elm.tail.replace('\n', ' ') | |
root_element = etree.fromstring(self._rst_xml) | |
normalize_whitespace(root_element) | |
# ---- | |
# with open('_scratch/rst2etree.xml', 'w') as f: | |
# f.write(etree.tostring(root_element)) | |
# ---- | |
return root_element | |
@property | |
def _rst_xml(self): | |
""" | |
Bytes containing XML corresponding to the RestructuredText in *rst*. | |
The XML vocabulary is a simple one using tags like `paragraph` and | |
`strong`. | |
""" | |
if self._rst is None: | |
return '<document/>' | |
return core.publish_string(source=self._rst, writer_name='xml') | |
def _render_bullet_list(self, bullet_list): | |
""" | |
Add a bullet to *blkcntnr* for each list item in *bullet_list*. | |
""" | |
def render_list_item(list_item): | |
for idx, para in enumerate(list_item): | |
style_key = 'li' if idx == 0 else 'lc' | |
self._render_paragraph(para, self._styles[style_key]) | |
for list_item in bullet_list: | |
render_list_item(list_item) | |
def _render_paragraph(self, para, style): | |
""" | |
Add a new paragraph to *blkcntnr* containing the content in the | |
`paragraph` element *para*. Create appropriate runs for text having | |
strong and emphasis inline formatting. | |
""" | |
paragraph = self._blkcntnr.add_paragraph(style=style) | |
if para.text is not None: | |
paragraph.add_run(para.text) | |
for child in para: | |
style_key = {'strong': 'b', 'emphasis': 'i'}.get(child.tag) | |
if child.text is not None: | |
paragraph.add_run(child.text, self._styles[style_key]) | |
if child.tail is not None: | |
paragraph.add_run(child.tail) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment