Skip to content

Instantly share code, notes, and snippets.

@Triavanicus
Forked from gasman/gfm.py
Last active July 14, 2019 06:42
Show Gist options
  • Save Triavanicus/7d281c7e7db3efe654d6 to your computer and use it in GitHub Desktop.
Save Triavanicus/7d281c7e7db3efe654d6 to your computer and use it in GitHub Desktop.
import re
from hashlib import md5
from markdown import markdown
def gfm(value):
# Extract pre blocks.
extractions = {}
def pre_extraction_callback(matchobj):
digest = md5(matchobj.group(0).encode('utf-8')).hexdigest()
extractions[digest] = matchobj.group(0)
return "{gfm-extraction-%s}" % digest
pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
value = re.sub(pattern, pre_extraction_callback, value)
# Prevent foo_bar_baz from ending up with an italic word in the middle.
def italic_callback(matchobj):
s = matchobj.group(0)
if list(s).count('_') >= 2:
return s.replace('_', '\_')
return s
pattern = re.compile(r'^(?! {4}|\t).*\w+(?<!_)_\w+_\w[\w_]*', re.MULTILINE | re.UNICODE)
value = re.sub(pattern, italic_callback, value)
# In very clear cases, let newlines become <br /> tags.
def newline_callback(matchobj):
if len(matchobj.group(1)) == 1:
return matchobj.group(0).rstrip() + ' \n'
else:
return matchobj.group(0)
pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE | re.UNICODE)
value = re.sub(pattern, newline_callback, value)
# Insert pre block extractions.
def pre_insert_callback(matchobj):
return '\n\n' + extractions[matchobj.group(1)]
value = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}', pre_insert_callback, value)
value = markdown(value)
#All other gfm things
def link_callback(matchobj):
return "<a href=\""+matchobj.group(1)+"\" target=\"_blank\">"+matchobj.group(1)+"</a>"
pattern = re.compile(r'((?<!")http://[a-zA-Z./&%#]*|(?<!")https://[a-zA-Z./&%#]*|(?<!")http://[a-zA-Z./&%#]*|(?<!")git://[a-zA-Z./&%#]*|(?<!")ftp://[a-zA-Z./&%#]*)',re.MULTILINE | re.UNICODE)
value = re.sub(pattern, link_callback, value)
def del_callback(matchobj):
return "<del>"+matchobj.group(1)+"</del>"
pattern = re.compile(r'~~(.*)~~',re.MULTILINE | re.UNICODE)
value = re.sub(pattern, del_callback, value)
def sup_callback(matchobj):
return "<sup>"+matchobj.group(1)+"</sup>"
pattern = re.compile(r'\^\((.*)\)',re.MULTILINE | re.UNICODE)
value = re.sub(pattern, sup_callback, value)
def mark_callback(matchobj):
return "<mark>"+matchobj.group(1)+"</mark>"
pattern = re.compile(r'==(.*)==',re.MULTILINE | re.UNICODE)
value = re.sub(pattern, mark_callback, value)
def table_callback(matchobj):
table = "<table><tr>"
headers = matchobj.group(1).replace(r'^( *| *\| *)$', '').split("|")
align = matchobj.group(2).replace(r'^ *|\| *$', '').split("|")
cells = matchobj.group(3).replace(r'\n$', '').split("\n")
for i in range(0, len(align)):
align[i] = align[i].replace("<br />", '')
align[i] = align[i].replace(" ", '')
i = 0
for header in headers:
header = header.replace("<p>", '')
header = header.replace("</p>", '')
thTag = "<th"
if re.match(r'(\:\-+\:)', align[i]):
thTag = thTag + " align=\"center\""
elif re.match(r'\:\-+', align[i]):
thTag = thTag + " align=\"left\""
elif re.match(r'\-+\:', align[i]):
thTag = thTag + " align=\"right\""
thTag = thTag+">"
table = table + thTag+header+"</th>"
i+=1
table = table + "</tr>"
for cell in cells:
if len(cell) == 0:
continue
table = table + "<tr>"
i = 0
for content in cell.split("|"):
content = content.replace("<p>", '')
content = content.replace("</p>", '')
content = content.replace("<br />", "")
content = content.replace(r' {2,}', '')
tdTag = "<td"
if re.match(r'(\:\-+\:)', align[i]):
tdTag = tdTag + " align=\"center\""
elif re.match(r'\:\-+', align[i]):
tdTag = tdTag + " align=\"left\""
elif re.match(r'\-+\:', align[i]):
tdTag = tdTag + " align=\"right\""
tdTag = tdTag+">"
table = table + tdTag+content+"</td>"
i+=1
table = table + "</tr>"
table = table + "</table>"
return table
pattern = re.compile(r'^ *(\S.*\|.*)<br />\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*', re.MULTILINE | re.UNICODE)
value = re.sub(pattern, table_callback, value)
def ptable_callback(matchobj):
table = "<table>\n<tr>"
headers = matchobj.group(1).replace(r'^(\| *| *\| *\|)$', '').split("|")
align = matchobj.group(2).replace(r'^ *|\| *$', '').split("|")
cells = matchobj.group(3).replace(r'\n$', '').split("\n")
for i in range(0, len(align)):
align[i] = align[i].replace("<br />", '')
align[i] = align[i].replace(" ", '')
i = 0
for header in headers:
header = header.replace("<p>", '')
header = header.replace("</p>", '')
thTag = "<th"
if re.match(r'(\:\-+\:)', align[i]):
thTag = thTag + " align=\"center\""
elif re.match(r'\:\-+', align[i]):
thTag = thTag + " align=\"left\""
elif re.match(r'\-+\:', align[i]):
thTag = thTag + " align=\"right\""
thTag = thTag+">"
table = table + thTag+header+"</th>"
i+=1
table = table + "</tr>\n"
for cell in cells:
if len(cell) == 0:
continue
table = table + "<tr>"
i = 0
cell = cell.replace("<p>", '')
cell = cell.replace("</p>", '')
cell = cell[1:-1]
for content in cell.split("|"):
content = content
content = content.replace("<p>", '')
content = content.replace("</p>", '')
content = content.replace("<br />", "")
content = content.replace(r' {2,}', '')
tdTag = "<td"
try:
if re.match(r'(\:\-+\:)', align[i]):
tdTag = tdTag + " align=\"center\""
elif re.match(r'\:\-+', align[i]):
tdTag = tdTag + " align=\"left\""
elif re.match(r'\-+\:', align[i]):
tdTag = tdTag + " align=\"right\""
except:
pass
tdTag = tdTag+">"
table = table + tdTag+content+"</td>"
i+=1
table = table + "</tr>\n"
table = table + "</table>"
return table
pattern = re.compile(r'^<p>\| *(\S.*\|.*) *\| *\n *\| *(\S.*\|.*) *\| *\n *((?:.*\|.*(?:\n|$))*)\n*', re.MULTILINE | re.UNICODE)
value = re.sub(pattern, ptable_callback, value)
pattern = re.compile(r'^ *\| *(\S.*\|.*) *\| *\n *\| *(\S.*\|.*) *\| *\n *((?:.*\|.*(?:\n|$))*)\n*', re.MULTILINE | re.UNICODE)
value = re.sub(pattern, ptable_callback, value)
return value
# Test suite.
try:
from nose.tools import assert_equal
except ImportError:
def assert_equal(a, b):
assert a == b, '%r != %r' % (a, b)
def test_single_underscores():
"""Don't touch single underscores inside words."""
assert_equal(
gfm('foo_bar'),
'foo_bar',
)
def test_underscores_code_blocks():
"""Don't touch underscores in code blocks."""
assert_equal(
gfm(' foo_bar_baz'),
' foo_bar_baz',
)
def test_underscores_pre_blocks():
"""Don't touch underscores in pre blocks."""
assert_equal(
gfm('<pre>\nfoo_bar_baz\n</pre>'),
'\n\n<pre>\nfoo_bar_baz\n</pre>',
)
def test_pre_block_pre_text():
"""Don't treat pre blocks with pre-text differently."""
a = '\n\n<pre>\nthis is `a\\_test` and this\\_too\n</pre>'
b = 'hmm<pre>\nthis is `a\\_test` and this\\_too\n</pre>'
assert_equal(
gfm(a)[2:],
gfm(b)[3:],
)
def test_two_underscores():
"""Escape two or more underscores inside words."""
assert_equal(
gfm('foo_bar_baz'),
'foo\\_bar\\_baz',
)
assert_equal(
gfm('something else then foo_bar_baz'),
'something else then foo\\_bar\\_baz',
)
def test_newlines_simple():
"""Turn newlines into br tags in simple cases."""
assert_equal(
gfm('foo\nbar'),
'foo \nbar',
)
def test_newlines_group():
"""Convert newlines in all groups."""
assert_equal(
gfm('apple\npear\norange\n\nruby\npython\nerlang'),
'apple \npear \norange\n\nruby \npython \nerlang',
)
def test_newlines_long_group():
"""Convert newlines in even long groups."""
assert_equal(
gfm('apple\npear\norange\nbanana\n\nruby\npython\nerlang'),
'apple \npear \norange \nbanana\n\nruby \npython \nerlang',
)
def test_newlines_list():
"""Don't convert newlines in lists."""
assert_equal(
gfm('# foo\n# bar'),
'# foo\n# bar',
)
assert_equal(
gfm('* foo\n* bar'),
'* foo\n* bar',
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment