Last active
August 29, 2015 13:57
-
-
Save ict4eo/9546725 to your computer and use it in GitHub Desktop.
Convert a document in Google Code wiki page format to reStructuredText (reST)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Convert a wikipage in Google wiki format to reStructuredText | |
Version 1.0.2 | |
19 March 2014 | |
Derek Hohls, Meraka, CSIR | |
Handles | |
======= | |
* internal comments (#name - no space) | |
* bold / italics text | |
* inline code | |
* code blocks | |
* internal cross-reference [[example]] | |
* bullets | |
* numbered list | |
* headers (up to 4 levels deep) | |
* tables | |
* standalone hyperlinks (these stay as is) | |
TODO | |
==== | |
indented bullets | |
external hyperlinks with embedded link: `Python web site <http://www.python.org>`__ | |
named internal cross-reference: [[FutureModuleRoadmap | Future Roadmap]] | |
""" | |
import pprint | |
def process_header(txt): | |
if txt[:1] == '=': | |
level = 1 | |
under = '=============================================================' | |
if txt[:2] == '==': | |
level = 2 | |
under = '-------------------------------------------------------------' | |
if txt[:3] == '===': | |
level = 3 | |
under = '^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^' | |
if txt[:3] == '====': | |
level = 4 | |
under = '~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' | |
else: | |
pass | |
replace = '======'[:level] | |
_txt = txt.replace(replace, '') | |
_txt = _txt.strip(' ') | |
length = len(_txt) - 1 | |
result = [] | |
result.append(_txt) | |
result.append(under[1:length]) | |
result.append(' ') | |
return result | |
def process_table(rows): | |
items = [] | |
results = [] | |
# clean data | |
for r in rows: | |
r = r.strip('\n').strip(' ').strip('||') | |
new = r.split('||') | |
items.append(new) | |
#pprint.pprint(items) | |
# size of cells | |
count = {} | |
for key, i in enumerate(items[0]): | |
count[key] = len(i) | |
for item in items: | |
#print len(item), "::", item | |
for key, i in enumerate(item): | |
count[key] = max(len(i), count[key]) | |
#pprint.pprint(count) | |
# splitter row | |
split = '+' | |
for key, i in enumerate(items[0]): | |
split = "%s%s%s" % (split, "-" * count[key], '+') | |
#print split | |
# results | |
results.append(split) | |
for item in items: | |
cell = '|' | |
for key, i in enumerate(item): | |
cell += i.ljust(count[key]) | |
cell += '|' | |
results.append(cell) | |
results.append(split) | |
#pprint.pprint(results) | |
return results | |
def process_bullet(text, text_prior): | |
"""TODO""" | |
return text | |
def process(file_in, file_out=None): | |
# open input/output files | |
inputfile = open(file_in) | |
if file_out: | |
outputfile = open(file_out, 'w') | |
text_in = inputfile.readlines() | |
# defaults | |
text_out = [] | |
table_rows = [] | |
is_code = False | |
txt_prior = '' | |
# process rows | |
for txt in text_in: | |
_txt = txt.strip(' ') | |
if is_code and txt[:3] != '}}}': # indent code | |
text_out.append(' %s' % _txt) | |
elif txt[:3] == '{{{': # code start | |
is_code = True | |
text_out.append('::\n') | |
elif txt[:3] == '}}}': # code end | |
text_out.append('\n') | |
is_code = False | |
else: | |
if _txt[:2] == '# ': # numbered list | |
_txt = _txt.replace('# ', '#. ') | |
if txt[0] == '*': # bullets | |
_txt = process_bullet(txt, txt_prior) | |
_txt = _txt.replace('__', '**') # bold / strong | |
_txt = _txt.replace('_', '*') # italics | |
_txt = _txt.replace('`', '``') # inline | |
_txt = _txt.replace('{{', '``').replace('}}', '``') # inline | |
_txt = _txt.replace('[', '[[').replace(']', ']]') # cross-ref | |
if txt[0] == '#': # comment | |
text_out.append('.. .. %s' % _txt) | |
elif txt[0] == '=': # header | |
header_out = process_header(_txt) | |
for head in header_out: | |
text_out.append(head) | |
text_out.append('\n') | |
elif txt[:2] == '||': # table | |
table_rows.append(_txt) | |
else: | |
if table_rows: | |
#pprint.pprint(table_rows) | |
new_rows = process_table(table_rows) | |
for row in new_rows: | |
text_out.append("%s\n" % row) | |
table_rows = [] | |
text_out.append(_txt) | |
txt_prior = txt | |
# write output | |
if file_out: | |
outputfile.writelines(text_out) | |
else: | |
for text in text_out: | |
print text.strip('\n') | |
process('text.wiki', 'text.rst') | |
#process('text.wiki') # to screen |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment