-
-
Save Triavanicus/7d281c7e7db3efe654d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
from hashlib import md5 | |
from markdown import markdown | |
def gfm(value): | |
# Extract pre blocks. | |
extractions = {} | |
def pre_extraction_callback(matchobj): | |
digest = md5(matchobj.group(0).encode('utf-8')).hexdigest() | |
extractions[digest] = matchobj.group(0) | |
return "{gfm-extraction-%s}" % digest | |
pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL) | |
value = re.sub(pattern, pre_extraction_callback, value) | |
# Prevent foo_bar_baz from ending up with an italic word in the middle. | |
def italic_callback(matchobj): | |
s = matchobj.group(0) | |
if list(s).count('_') >= 2: | |
return s.replace('_', '\_') | |
return s | |
pattern = re.compile(r'^(?! {4}|\t).*\w+(?<!_)_\w+_\w[\w_]*', re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, italic_callback, value) | |
# In very clear cases, let newlines become <br /> tags. | |
def newline_callback(matchobj): | |
if len(matchobj.group(1)) == 1: | |
return matchobj.group(0).rstrip() + ' \n' | |
else: | |
return matchobj.group(0) | |
pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, newline_callback, value) | |
# Insert pre block extractions. | |
def pre_insert_callback(matchobj): | |
return '\n\n' + extractions[matchobj.group(1)] | |
value = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}', pre_insert_callback, value) | |
value = markdown(value) | |
#All other gfm things | |
def link_callback(matchobj): | |
return "<a href=\""+matchobj.group(1)+"\" target=\"_blank\">"+matchobj.group(1)+"</a>" | |
pattern = re.compile(r'((?<!")http://[a-zA-Z./&%#]*|(?<!")https://[a-zA-Z./&%#]*|(?<!")http://[a-zA-Z./&%#]*|(?<!")git://[a-zA-Z./&%#]*|(?<!")ftp://[a-zA-Z./&%#]*)',re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, link_callback, value) | |
def del_callback(matchobj): | |
return "<del>"+matchobj.group(1)+"</del>" | |
pattern = re.compile(r'~~(.*)~~',re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, del_callback, value) | |
def sup_callback(matchobj): | |
return "<sup>"+matchobj.group(1)+"</sup>" | |
pattern = re.compile(r'\^\((.*)\)',re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, sup_callback, value) | |
def mark_callback(matchobj): | |
return "<mark>"+matchobj.group(1)+"</mark>" | |
pattern = re.compile(r'==(.*)==',re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, mark_callback, value) | |
def table_callback(matchobj): | |
table = "<table><tr>" | |
headers = matchobj.group(1).replace(r'^( *| *\| *)$', '').split("|") | |
align = matchobj.group(2).replace(r'^ *|\| *$', '').split("|") | |
cells = matchobj.group(3).replace(r'\n$', '').split("\n") | |
for i in range(0, len(align)): | |
align[i] = align[i].replace("<br />", '') | |
align[i] = align[i].replace(" ", '') | |
i = 0 | |
for header in headers: | |
header = header.replace("<p>", '') | |
header = header.replace("</p>", '') | |
thTag = "<th" | |
if re.match(r'(\:\-+\:)', align[i]): | |
thTag = thTag + " align=\"center\"" | |
elif re.match(r'\:\-+', align[i]): | |
thTag = thTag + " align=\"left\"" | |
elif re.match(r'\-+\:', align[i]): | |
thTag = thTag + " align=\"right\"" | |
thTag = thTag+">" | |
table = table + thTag+header+"</th>" | |
i+=1 | |
table = table + "</tr>" | |
for cell in cells: | |
if len(cell) == 0: | |
continue | |
table = table + "<tr>" | |
i = 0 | |
for content in cell.split("|"): | |
content = content.replace("<p>", '') | |
content = content.replace("</p>", '') | |
content = content.replace("<br />", "") | |
content = content.replace(r' {2,}', '') | |
tdTag = "<td" | |
if re.match(r'(\:\-+\:)', align[i]): | |
tdTag = tdTag + " align=\"center\"" | |
elif re.match(r'\:\-+', align[i]): | |
tdTag = tdTag + " align=\"left\"" | |
elif re.match(r'\-+\:', align[i]): | |
tdTag = tdTag + " align=\"right\"" | |
tdTag = tdTag+">" | |
table = table + tdTag+content+"</td>" | |
i+=1 | |
table = table + "</tr>" | |
table = table + "</table>" | |
return table | |
pattern = re.compile(r'^ *(\S.*\|.*)<br />\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*', re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, table_callback, value) | |
def ptable_callback(matchobj): | |
table = "<table>\n<tr>" | |
headers = matchobj.group(1).replace(r'^(\| *| *\| *\|)$', '').split("|") | |
align = matchobj.group(2).replace(r'^ *|\| *$', '').split("|") | |
cells = matchobj.group(3).replace(r'\n$', '').split("\n") | |
for i in range(0, len(align)): | |
align[i] = align[i].replace("<br />", '') | |
align[i] = align[i].replace(" ", '') | |
i = 0 | |
for header in headers: | |
header = header.replace("<p>", '') | |
header = header.replace("</p>", '') | |
thTag = "<th" | |
if re.match(r'(\:\-+\:)', align[i]): | |
thTag = thTag + " align=\"center\"" | |
elif re.match(r'\:\-+', align[i]): | |
thTag = thTag + " align=\"left\"" | |
elif re.match(r'\-+\:', align[i]): | |
thTag = thTag + " align=\"right\"" | |
thTag = thTag+">" | |
table = table + thTag+header+"</th>" | |
i+=1 | |
table = table + "</tr>\n" | |
for cell in cells: | |
if len(cell) == 0: | |
continue | |
table = table + "<tr>" | |
i = 0 | |
cell = cell.replace("<p>", '') | |
cell = cell.replace("</p>", '') | |
cell = cell[1:-1] | |
for content in cell.split("|"): | |
content = content | |
content = content.replace("<p>", '') | |
content = content.replace("</p>", '') | |
content = content.replace("<br />", "") | |
content = content.replace(r' {2,}', '') | |
tdTag = "<td" | |
try: | |
if re.match(r'(\:\-+\:)', align[i]): | |
tdTag = tdTag + " align=\"center\"" | |
elif re.match(r'\:\-+', align[i]): | |
tdTag = tdTag + " align=\"left\"" | |
elif re.match(r'\-+\:', align[i]): | |
tdTag = tdTag + " align=\"right\"" | |
except: | |
pass | |
tdTag = tdTag+">" | |
table = table + tdTag+content+"</td>" | |
i+=1 | |
table = table + "</tr>\n" | |
table = table + "</table>" | |
return table | |
pattern = re.compile(r'^<p>\| *(\S.*\|.*) *\| *\n *\| *(\S.*\|.*) *\| *\n *((?:.*\|.*(?:\n|$))*)\n*', re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, ptable_callback, value) | |
pattern = re.compile(r'^ *\| *(\S.*\|.*) *\| *\n *\| *(\S.*\|.*) *\| *\n *((?:.*\|.*(?:\n|$))*)\n*', re.MULTILINE | re.UNICODE) | |
value = re.sub(pattern, ptable_callback, value) | |
return value | |
# Test suite. | |
try: | |
from nose.tools import assert_equal | |
except ImportError: | |
def assert_equal(a, b): | |
assert a == b, '%r != %r' % (a, b) | |
def test_single_underscores(): | |
"""Don't touch single underscores inside words.""" | |
assert_equal( | |
gfm('foo_bar'), | |
'foo_bar', | |
) | |
def test_underscores_code_blocks(): | |
"""Don't touch underscores in code blocks.""" | |
assert_equal( | |
gfm(' foo_bar_baz'), | |
' foo_bar_baz', | |
) | |
def test_underscores_pre_blocks(): | |
"""Don't touch underscores in pre blocks.""" | |
assert_equal( | |
gfm('<pre>\nfoo_bar_baz\n</pre>'), | |
'\n\n<pre>\nfoo_bar_baz\n</pre>', | |
) | |
def test_pre_block_pre_text(): | |
"""Don't treat pre blocks with pre-text differently.""" | |
a = '\n\n<pre>\nthis is `a\\_test` and this\\_too\n</pre>' | |
b = 'hmm<pre>\nthis is `a\\_test` and this\\_too\n</pre>' | |
assert_equal( | |
gfm(a)[2:], | |
gfm(b)[3:], | |
) | |
def test_two_underscores(): | |
"""Escape two or more underscores inside words.""" | |
assert_equal( | |
gfm('foo_bar_baz'), | |
'foo\\_bar\\_baz', | |
) | |
assert_equal( | |
gfm('something else then foo_bar_baz'), | |
'something else then foo\\_bar\\_baz', | |
) | |
def test_newlines_simple(): | |
"""Turn newlines into br tags in simple cases.""" | |
assert_equal( | |
gfm('foo\nbar'), | |
'foo \nbar', | |
) | |
def test_newlines_group(): | |
"""Convert newlines in all groups.""" | |
assert_equal( | |
gfm('apple\npear\norange\n\nruby\npython\nerlang'), | |
'apple \npear \norange\n\nruby \npython \nerlang', | |
) | |
def test_newlines_long_group(): | |
"""Convert newlines in even long groups.""" | |
assert_equal( | |
gfm('apple\npear\norange\nbanana\n\nruby\npython\nerlang'), | |
'apple \npear \norange \nbanana\n\nruby \npython \nerlang', | |
) | |
def test_newlines_list(): | |
"""Don't convert newlines in lists.""" | |
assert_equal( | |
gfm('# foo\n# bar'), | |
'# foo\n# bar', | |
) | |
assert_equal( | |
gfm('* foo\n* bar'), | |
'* foo\n* bar', | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment