Skip to content

Instantly share code, notes, and snippets.

@TheLouisHong
Created July 3, 2015 09:31
Show Gist options
  • Save TheLouisHong/8dd5e7a6a9945d440d9e to your computer and use it in GitHub Desktop.
Save TheLouisHong/8dd5e7a6a9945d440d9e to your computer and use it in GitHub Desktop.
Reddit2HTML
import re
# Engine
class Reddit2HTMLConverter:
def __init__(self, regPattern, formatStr, formatter):
self.regPattern = regPattern
self.formatter = formatter
self.formatStr = formatStr
def convert(self, text):
return self.formatter(self.regPattern, self.formatStr, text)
# Utility
def flatten(seq):
l = []
for elt in seq:
t = type(elt)
if t is tuple or t is list:
for elt2 in flatten(elt):
l.append(elt2)
else:
l.append(elt)
return l
# An example of a formatter method, write your own if you want for customization.
def REPLACE_EVERYTHING(regPattern, formatStr, text):
delta = 0
result = text
for matched in regPattern.finditer(text):
delta = len(text) - len(result)
result = result[:matched.start() - delta] + formatStr.format(*matched.groups()) + result[matched.end() - delta:]
return result
linkPattern = re.compile(r'\[(.+?)\]\((.+?)\)')
linkFormat = r'<a href="{1}">{0}</a>'
boldPattern = re.compile(r'\*\*(.+?)\*\*')
boldFormat = r'<strong>{}</strong>'
italicPattern = re.compile(r'\*(.+?)\*')
italicFormat = r'<i>{}</i>'
italicBoldPattern = re.compile(r'\*\*\*(.+)\*\*\*')
italicBoldFormat = r'<i><strong>{}</strong></i>'
# TODO Make list formatter method
# dashListPattern = re.compile('(- .+?\\n)+(- .+?)')
# dashListFormat = ""
# listPattern = re.compile('(\\* .+?\\n)+(\\* .+?)')
header1Pattern = re.compile(r'(?:\n)?# *(.*)(?:\n)?')
header1Format = "<h1>{}</h1>"
header2Pattern = re.compile(r'(?:\n)?## *(.*)(?:\n)?')
header2Format = "<h2>{}</h2>"
header3Pattern = re.compile(r'(?:\n)?### *(.*)(?:\n)?')
header3Format = "<h3>{}</h3>"
header4Pattern = re.compile(r'(?:\n)?#### *(.*)(?:\n)?')
header4Format = "<h4>{}</h4>"
listOConverters = []
linkConverter = Reddit2HTMLConverter(
linkPattern,
linkFormat,
REPLACE_EVERYTHING)
boldConverter = Reddit2HTMLConverter(
boldPattern,
boldFormat,
REPLACE_EVERYTHING)
italicConverter = Reddit2HTMLConverter(
italicPattern,
italicFormat,
REPLACE_EVERYTHING)
italicBoldConverter = Reddit2HTMLConverter(
italicBoldPattern,
italicFormat,
REPLACE_EVERYTHING)
header1Converter = Reddit2HTMLConverter(
header1Pattern,
header1Format,
REPLACE_EVERYTHING)
header2Converter = Reddit2HTMLConverter(
header2Pattern,
header2Format,
REPLACE_EVERYTHING)
header3Converter = Reddit2HTMLConverter(
header3Pattern,
header3Format,
REPLACE_EVERYTHING)
header4Converter = Reddit2HTMLConverter(
header4Pattern,
header4Format,
REPLACE_EVERYTHING)
listOConverters.append(linkConverter)
listOConverters.append(boldConverter)
listOConverters.append(italicConverter)
listOConverters.append(italicBoldConverter)
listOConverters.append(header1Converter)
listOConverters.append(header2Converter)
listOConverters.append(header3Converter)
listOConverters.append(header4Converter)
if __name__ == "__main__":
text = open("input.txt", "r").read()
results = text;
for converter in listOConverters:
results = converter.convert(results)
output = open("output.txt", "w")
output.write(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment