Created
October 19, 2010 22:51
-
-
Save jl2/635319 to your computer and use it in GitHub Desktop.
Merge two or more wiki tables and merge duplicate entries based on 'key' columns in each table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# Script to merge two or more wiki tables and merge duplicate entries based on 'key' columns in each table | |
# How to use: | |
# wikimerge.py keys columns files... | |
# 'keys' is a comma seperated list of key columns for each table | |
# This is the column from each table that will be joined on | |
# Do not put spaces in the list. Use '1,2,3', not '1, 2, 3' | |
# 'columns' is a semi-colon sperated list of table,column pairs specifying the output file columns | |
# Don't put spaces in this list either. Use '1,2;0,1;2,4' not '1,2; 0,2; 2,4' | |
# files are two or more input files in "wiki table format". Just copy/paste all the lines from a wikie between {| and \} | |
import sys | |
def main(args): | |
keyString = args[0] | |
keys = keyString.split(',') | |
keys = list(map(int, keys)) | |
outColsString = args[1] | |
outColsTmp = outColsString.split(';') | |
def toTuple(istr): | |
p1,p2 = istr.split(',') | |
return (int(p1), int(p2)) | |
outCols = list(map(toTuple, outColsTmp)) | |
inNames = args[2:] | |
infs = [] | |
for fn in inNames: | |
infs.append(open(fn, "rt")) | |
tableColumns = [] | |
tables = [] | |
for i in range(len(infs)): | |
tableColumns.append([]) | |
tables.append(dict()) | |
curEntry = dict() | |
curCol = 0 | |
curRow = dict() | |
for i in range(len(infs)): | |
for line in infs[i]: | |
if len(line)==0: continue | |
if line[0:2]=='{|': continue | |
if line[0:2]=='|}': continue | |
if line[0] == '!': | |
tableColumns[i].append(line[2:-1]) | |
curCol += 1 | |
continue | |
if line[0:2] == '|-': | |
if len(curRow)>0: | |
cr = tables[i].get(curRow[keys[i]], []) | |
cr.append(curRow) | |
tables[i][curRow[keys[i]]] = cr | |
curRow = dict() | |
curCol = 0 | |
continue | |
if line[0] == '|': | |
val = line[2:-1] | |
curRow[curCol] = val | |
curCol += 1 | |
continue | |
for inf in infs: | |
inf.close() | |
newTable = dict() | |
print('{| class="wikitable sortable"') | |
for oc in outCols: | |
tab, col = oc | |
print('! {}'.format(tableColumns[tab][col])) | |
print('|-') | |
for k in tables[0]: | |
for oc in outCols: | |
tab, col = oc | |
vals = [] | |
if tables[tab].get(k): | |
for v in tables[tab][k]: | |
vals.append(v[col]) | |
print('| {}'.format(','.join(vals))) | |
print('|-') | |
print('|}') | |
if __name__=='__main__': | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment