Skip to content

Instantly share code, notes, and snippets.

@KasparNagu
Created May 12, 2017 20:27
Show Gist options
  • Save KasparNagu/bcc1bbf2087527d275bbbf8169982a5e to your computer and use it in GitHub Desktop.
Save KasparNagu/bcc1bbf2087527d275bbbf8169982a5e to your computer and use it in GitHub Desktop.
Python parser and editor for wiki tables
#/usr/bin/python
import re
class BorderedTextList:
def __init__(self,lst):
self.lst = lst
def text(self):
return "".join([l.text() for l in self.lst])
def split(self,re):
return BorderedTextList([l.split(re) for l in self.lst])
def __repr__(self):
return "["+(", ".join([str(l) for l in self.lst]))+"]";
def __getitem__(self, key):
if isinstance(key,tuple):
cur = self.lst
for t in key:
cur = cur[t]
return cur
else:
return self.lst[key]
def __iter__(self):
return iter(self.lst)
def __len__(self):
return len(self.lst)
class BorderedText(object):
def __init__(self,txt,left="",right=""):
self.txt = txt
self.left = left
self.right = right
def split(self,regex):
splt = re.split("("+regex+")",self.txt)
if len(splt)==1:
return BorderedTextList([BorderedText(self.txt,self.left,self.right)])
first = BorderedText(splt[0],self.left)
rest = zip(*[splt[i::2] for i in range(1,3)])
rest = [BorderedText(txt,left) for left,txt in rest]
rest[-1].right = self.right
return BorderedTextList([first] + rest)
def text(self):
return self.left + self.txt + self.right
def __repr__(self):
def nlrep(txt):
return txt.replace("\n","\\n").replace("\r","\\r")
return '[left="%s" txt="%s" right="%s"]' % (nlrep(self.left),nlrep(self.txt),nlrep(self.right))
class BorderedTextSpaned(BorderedText):
def __init__(self,txt,left="",right="",spanParent=None):
super(BorderedTextSpaned, self).__init__(txt,left,right)
if spanParent == None:
self.spanParent = self
else:
self.spanParent = spanParent
class WikiTableEditor:
def __init__(self,txt):
self.parseTable(txt)
def parseTable(self,txt):
self.rows = BorderedText(txt).split("(?:^|\r\n|\n)\|- *").split("(?:(?:^|\r\n|\n)[|!]| *\|\|) *")
def moveDefinitionToLeft(t):
l = re.split("(\| *)",t.txt,1)
if len(l) == 1 or "[" in l[0]:
return t
else:
return BorderedText(l[2],t.left+l[0]+l[1],t.right)
self.rows.lst = [[moveDefinitionToLeft(f) for f in r] for r in self.rows]
def fillColspans(inRow):
outRow = []
for f in inRow:
outRow.append(BorderedTextSpaned(f.txt,f.left,f.right))
m = re.search('colspan="?([0-9]+)',f.left)
if m:
for i in range(0,int(m.group(1))-1):
outRow.append(BorderedTextSpaned("","","",f))
return BorderedTextList(outRow)
self.rows.lst = [fillColspans(r) for r in self.rows]
for row in range(0,len(self.rows)):
for col in range(0,len(self.rows[row])):
m = re.search('rowspan="?([0-9]+)',self.rows[row,col].left)
if m:
span = int(m.group(1))
if span > 1:
for k in range(row+1,min(row+span,len(self.rows))):
self.rows[k].lst = self.rows[k].lst[0:col] + [BorderedTextSpaned("","","",self.rows[row,col])] + self.rows[k].lst[col:]
def __repr__(self):
cw = []
for r in self.rows:
if len(cw) < len(r):
cw += (len(r)-len(cw))*[2]
for k,v in enumerate(r):
cl = min(len(v.txt),40)
if cw[k] < cl:
cw[k] = cl
return "\n".join(["I ".join([re.sub("[\n\r]","",f.txt)[:cw[k]].ljust(cw[k]) for k,f in enumerate(row)]) for row in self.rows])
def __getitem__(self,key):
e = self.rows[key]
if isinstance(e,BorderedText):
return self.rows[key].spanParent.txt
else:
return e
def __setitem__(self,key,txt):
self.rows[key].spanParent.txt = txt
def text(self):
return self.rows.text()
def __len__(self):
return len(self.rows)
def insertRow(self,before,numCols=-1):
if numCols == -1:
numCols = max([len(i) for i in self.rows])
newRow = [BorderedTextSpaned("","\n|-")]
for i in range(1,numCols):
newRow.append(BorderedTextSpaned("","\n| "))
newRows = (self.rows.lst[0:before] +
[BorderedTextList(newRow)] +
self.rows.lst[before:])
self.rows = BorderedTextList(newRows)
if __name__ == "__main__":
import sys
txt = open(sys.argv[1],"rb").read().decode("iso-8859-1")
def printable(txt):
return txt.replace("\n","\\n").replace("\r","\\r")
for m in re.findall("\{\|(.*?)\|\}",txt,re.DOTALL):
e = WikiTableEditor(m)
print(e)
for i in sys.argv[2:]:
row,col = i.split("/")
row = int(row)
col = int(col)
print("row=%d col=%d val='%s' left='%s' right='%s'" % (row,col,e[row,col],printable(e.rows[row,col].left),printable(e.rows[row,col].right)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment