Created
May 12, 2017 20:27
-
-
Save KasparNagu/bcc1bbf2087527d275bbbf8169982a5e to your computer and use it in GitHub Desktop.
Python parser and editor for wiki tables
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/usr/bin/python | |
import re | |
class BorderedTextList: | |
def __init__(self,lst): | |
self.lst = lst | |
def text(self): | |
return "".join([l.text() for l in self.lst]) | |
def split(self,re): | |
return BorderedTextList([l.split(re) for l in self.lst]) | |
def __repr__(self): | |
return "["+(", ".join([str(l) for l in self.lst]))+"]"; | |
def __getitem__(self, key): | |
if isinstance(key,tuple): | |
cur = self.lst | |
for t in key: | |
cur = cur[t] | |
return cur | |
else: | |
return self.lst[key] | |
def __iter__(self): | |
return iter(self.lst) | |
def __len__(self): | |
return len(self.lst) | |
class BorderedText(object): | |
def __init__(self,txt,left="",right=""): | |
self.txt = txt | |
self.left = left | |
self.right = right | |
def split(self,regex): | |
splt = re.split("("+regex+")",self.txt) | |
if len(splt)==1: | |
return BorderedTextList([BorderedText(self.txt,self.left,self.right)]) | |
first = BorderedText(splt[0],self.left) | |
rest = zip(*[splt[i::2] for i in range(1,3)]) | |
rest = [BorderedText(txt,left) for left,txt in rest] | |
rest[-1].right = self.right | |
return BorderedTextList([first] + rest) | |
def text(self): | |
return self.left + self.txt + self.right | |
def __repr__(self): | |
def nlrep(txt): | |
return txt.replace("\n","\\n").replace("\r","\\r") | |
return '[left="%s" txt="%s" right="%s"]' % (nlrep(self.left),nlrep(self.txt),nlrep(self.right)) | |
class BorderedTextSpaned(BorderedText): | |
def __init__(self,txt,left="",right="",spanParent=None): | |
super(BorderedTextSpaned, self).__init__(txt,left,right) | |
if spanParent == None: | |
self.spanParent = self | |
else: | |
self.spanParent = spanParent | |
class WikiTableEditor: | |
def __init__(self,txt): | |
self.parseTable(txt) | |
def parseTable(self,txt): | |
self.rows = BorderedText(txt).split("(?:^|\r\n|\n)\|- *").split("(?:(?:^|\r\n|\n)[|!]| *\|\|) *") | |
def moveDefinitionToLeft(t): | |
l = re.split("(\| *)",t.txt,1) | |
if len(l) == 1 or "[" in l[0]: | |
return t | |
else: | |
return BorderedText(l[2],t.left+l[0]+l[1],t.right) | |
self.rows.lst = [[moveDefinitionToLeft(f) for f in r] for r in self.rows] | |
def fillColspans(inRow): | |
outRow = [] | |
for f in inRow: | |
outRow.append(BorderedTextSpaned(f.txt,f.left,f.right)) | |
m = re.search('colspan="?([0-9]+)',f.left) | |
if m: | |
for i in range(0,int(m.group(1))-1): | |
outRow.append(BorderedTextSpaned("","","",f)) | |
return BorderedTextList(outRow) | |
self.rows.lst = [fillColspans(r) for r in self.rows] | |
for row in range(0,len(self.rows)): | |
for col in range(0,len(self.rows[row])): | |
m = re.search('rowspan="?([0-9]+)',self.rows[row,col].left) | |
if m: | |
span = int(m.group(1)) | |
if span > 1: | |
for k in range(row+1,min(row+span,len(self.rows))): | |
self.rows[k].lst = self.rows[k].lst[0:col] + [BorderedTextSpaned("","","",self.rows[row,col])] + self.rows[k].lst[col:] | |
def __repr__(self): | |
cw = [] | |
for r in self.rows: | |
if len(cw) < len(r): | |
cw += (len(r)-len(cw))*[2] | |
for k,v in enumerate(r): | |
cl = min(len(v.txt),40) | |
if cw[k] < cl: | |
cw[k] = cl | |
return "\n".join(["I ".join([re.sub("[\n\r]","",f.txt)[:cw[k]].ljust(cw[k]) for k,f in enumerate(row)]) for row in self.rows]) | |
def __getitem__(self,key): | |
e = self.rows[key] | |
if isinstance(e,BorderedText): | |
return self.rows[key].spanParent.txt | |
else: | |
return e | |
def __setitem__(self,key,txt): | |
self.rows[key].spanParent.txt = txt | |
def text(self): | |
return self.rows.text() | |
def __len__(self): | |
return len(self.rows) | |
def insertRow(self,before,numCols=-1): | |
if numCols == -1: | |
numCols = max([len(i) for i in self.rows]) | |
newRow = [BorderedTextSpaned("","\n|-")] | |
for i in range(1,numCols): | |
newRow.append(BorderedTextSpaned("","\n| ")) | |
newRows = (self.rows.lst[0:before] + | |
[BorderedTextList(newRow)] + | |
self.rows.lst[before:]) | |
self.rows = BorderedTextList(newRows) | |
if __name__ == "__main__": | |
import sys | |
txt = open(sys.argv[1],"rb").read().decode("iso-8859-1") | |
def printable(txt): | |
return txt.replace("\n","\\n").replace("\r","\\r") | |
for m in re.findall("\{\|(.*?)\|\}",txt,re.DOTALL): | |
e = WikiTableEditor(m) | |
print(e) | |
for i in sys.argv[2:]: | |
row,col = i.split("/") | |
row = int(row) | |
col = int(col) | |
print("row=%d col=%d val='%s' left='%s' right='%s'" % (row,col,e[row,col],printable(e.rows[row,col].left),printable(e.rows[row,col].right))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment