Skip to content

Instantly share code, notes, and snippets.

@sontek
Created December 28, 2014 23:53
Show Gist options
  • Save sontek/e58c1cf82931e1478cd2 to your computer and use it in GitHub Desktop.
Save sontek/e58c1cf82931e1478cd2 to your computer and use it in GitHub Desktop.
Check README for pypi
import sys
import StringIO
import urlparse
import cgi
from docutils import io, readers
from docutils.core import publish_doctree, Publisher
from docutils.transforms import TransformError
ALLOWED_SCHEMES = '''file ftp gopher hdl http https imap mailto mms news nntp
prospero rsync rtsp rtspu sftp shttp sip sips snews svn svn+ssh telnet
wais irc'''.split()
def trim_docstring(text):
"""
Trim indentation and blank lines from docstring text & return it.
See PEP 257.
"""
if not text:
return text
# Convert tabs to spaces (following the normal Python rules)
# and split into a list of lines:
lines = text.expandtabs().splitlines()
# Determine minimum indentation (first line doesn't count):
indent = sys.maxint
for line in lines[1:]:
stripped = line.lstrip()
if stripped:
indent = min(indent, len(line) - len(stripped))
# Remove indentation (first line is special):
trimmed = [lines[0].strip()]
if indent < sys.maxint:
for line in lines[1:]:
trimmed.append(line[indent:].rstrip())
# Strip off trailing and leading blank lines:
while trimmed and not trimmed[-1]:
trimmed.pop()
while trimmed and not trimmed[0]:
trimmed.pop(0)
# Return a single string:
return '\n'.join(trimmed)
def processDescription(source, output_encoding='unicode'):
"""Given an source string, returns an HTML fragment as a string.
The return value is the contents of the <body> tag.
Parameters:
- `source`: A multi-line text string; required.
- `output_encoding`: The desired encoding of the output. If a Unicode
string is desired, use the default value of "unicode" .
"""
# Dedent all lines of `source`.
source = trim_docstring(source)
settings_overrides = {
'raw_enabled': 0, # no raw HTML code
'file_insertion_enabled': 0, # no file/URL access
'halt_level': 2, # at warnings or errors, raise an exception
'report_level': 5, # never report problems with the reST code
}
# capture publishing errors, they go to stderr
old_stderr = sys.stderr
sys.stderr = s = StringIO.StringIO()
parts = None
try:
# Convert reStructuredText to HTML using Docutils.
document = publish_doctree(
source=source,
settings_overrides=settings_overrides
)
for node in document.traverse():
if node.tagname == '#text':
continue
if node.hasattr('refuri'):
uri = node['refuri']
elif node.hasattr('uri'):
uri = node['uri']
else:
continue
o = urlparse.urlparse(uri)
if o.scheme not in ALLOWED_SCHEMES:
raise TransformError('link scheme not allowed')
# now turn the transformed document into HTML
reader = readers.doctree.Reader(parser_name='null')
pub = Publisher(
reader,
source=io.DocTreeInput(document),
destination_class=io.StringOutput
)
pub.set_writer('html')
pub.process_programmatic_settings(None, settings_overrides, None)
pub.set_destination(None, None)
pub.publish()
parts = pub.writer.parts
except:
pass
sys.stderr = old_stderr
# original text if publishing errors occur
if parts is None or len(s.getvalue()) > 0:
output = "".join('<PRE>\n' + cgi.escape(source) + '</PRE>')
else:
output = parts['body']
if output_encoding != 'unicode':
output = output.encode(output_encoding)
return output
if __name__ == '__main__':
print(processDescription(sys.stdin.read()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment