Created
June 30, 2011 07:24
-
-
Save bengolder/1055797 to your computer and use it in GitHub Desktop.
parsing Gernot's Construction Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import xlrd | |
''' | |
Look at the data format for d3.js here: | |
https://gist.github.com/1049893 | |
''' | |
class Node(object): | |
def __init__(self, name, index): | |
self.name = name | |
self.index = index | |
self.links = [] | |
class Link(object): | |
def __init__(self, node1, node2): | |
self.node1 = node1 | |
self.node2 = node2 | |
self.weight = 0 | |
self.date = None | |
def forceInt(thing): | |
if thing == '': | |
return 0 | |
else: | |
return int(thing) | |
def getSheetData(sheet): | |
names = [c.value for c in sheet.col_slice(0)[1:]] | |
indices = [int(c.value)-1 for c in sheet.col_slice(1)[1:]] | |
nameIndexPairs = zip(indices, names) | |
links = [] | |
for i, node in enumerate(nameIndexPairs): | |
linkData = [forceInt(c.value) for c in sheet.row_slice(i+1)[2:]] | |
for n, val in enumerate(linkData): | |
if n > i: | |
link = [i, n, val] | |
links.append(link) | |
return nameIndexPairs, links | |
def makeNodes(nodeData): | |
db = {} | |
for n in nodeData: | |
name = ' '.join(n[1].split()) | |
db[n[0]] = Node(name, n[0]) | |
return db | |
def addLinks(nodeLib, linkData, date=None): | |
links = [] | |
for lnk in linkData: | |
node1 = nodeLib[lnk[0]] | |
node2 = nodeLib[lnk[1]] | |
weight = lnk[2] | |
link = Link(node1, node2) | |
link.weight = weight | |
if date: | |
link.date = date | |
node1.links.append(link) | |
node2.links.append(link) | |
links.append(link) | |
return nodeLib, links | |
def linksText(links, date): | |
linksMask = '"%s":[\n%s\n]' # date, then links joined with commas | |
linkMask = ' {"source":%s,"target":%s,"value":%s}' # node1.index, node2.index, weight | |
return linksMask % (date, ',\n'.join([(linkMask % (k.node1.index, | |
k.node2.index, k.weight)) for k in links if (k.weight > 0)])) | |
if __name__=='__main__': | |
fileName = 'data-sets.xls' | |
path = os.path.join(os.getcwd(), fileName ) | |
book = xlrd.open_workbook(path) | |
firstSheet = book.sheet_by_index(0) | |
nodes, linkData = getSheetData(firstSheet) | |
nodeLib = makeNodes(nodes) | |
nodeLib, links = addLinks(nodeLib, linkData, firstSheet.name) | |
nodesMask = '"nodes":[\n%s\n]' # nodes joined with commas | |
nodeMask = ' {"name":"%s","index":%s}' # name, then index | |
linkSets = [] | |
linkSets.append(linksText(links, firstSheet.name)) | |
for i in range(book.nsheets - 1 ): | |
sheet = book.sheet_by_index(i+1) | |
newLinkData = getSheetData(sheet)[1] | |
nodeLib, newLinks = addLinks(nodeLib, newLinkData, sheet.name) | |
linkSets.append(linksText(newLinks, sheet.name)) | |
nodeObjs = nodesMask % ',\n'.join([(nodeMask % (nodeLib[n].name, n)) for n in nodeLib]) | |
f = open('data.json', 'w') | |
f.write('{%s,\n%s}' % (nodeObjs, ',\n'.join(linkSets))) | |
f.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment