Last active
September 12, 2016 20:21
-
-
Save zouzias/8c0291aff57f08d23588 to your computer and use it in GitHub Desktop.
Convert CSV to JSON with two nested categories
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import json | |
csvfile = open('voronoid-graph.csv', 'r') | |
jsonfile = open('output.json', 'w') | |
fieldnames = ("gene","subcategory","category") | |
categories = {} | |
def labelToJSON(label): | |
return { "weight" : 1, "label": label} | |
def labelsToGroup(labels): | |
grps = [] | |
for label in labels: | |
grps.append(labelToJSON(label)) | |
return grps | |
def arrayToJSONArray(array): | |
return '[' + ','.join(array) + ']' | |
reader = csv.DictReader( csvfile, fieldnames) | |
for row in reader: | |
category = row['category'] | |
subCategory = row['subcategory'] | |
gene = row['gene'] | |
if category not in categories: | |
categories[category] = {} | |
if subCategory not in categories[category]: | |
categories[category][subCategory] = [] | |
categories[category][subCategory].append(gene) | |
#print(row) | |
#print(row['category']) | |
#json.dump(row, jsonfile) | |
#jsonfile.write('\n') | |
total = 0 | |
print("**************") | |
print("All categories") | |
print("**************") | |
for category in categories: | |
print('[' + str(total) + ']' + category) | |
total += 1 | |
total = 0 | |
print("******************") | |
print("All subcategories") | |
print("******************") | |
for category in categories: | |
for subcategory in categories[category]: | |
print('[' + str(total) + ']' + subcategory) | |
total += 1 | |
groups = [] | |
for category in categories: | |
subgroups = [] | |
genesPerSubCategory = 0 | |
for subcategory in categories[category]: | |
genes = categories[category][subcategory] | |
print('\tsubcategory : ' +subcategory + '(' + str(len(genes)) + ')') | |
subgroup = { "label" : subcategory , "weight": len(genes), "groups" : labelsToGroup(genes) } | |
genesPerSubCategory += len(genes) | |
subgroups.append(subgroup) | |
print('category : ' + category + '(' + str(genesPerSubCategory) + ')') | |
groups.append({ "label" : category , "weight": genesPerSubCategory, "groups" : subgroups }) | |
jsonOutput = { "groups" : groups } | |
json.dump(jsonOutput, jsonfile) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import json | |
csvfile = open('test_voronoi.csv', 'rU') | |
jsonfile = open('output.json', 'w') | |
fieldnames = ("gene","subcategory","category", "weight") | |
categories = {} | |
def weightSum(genes): | |
sum = 0.0 | |
for genes in genes: | |
sum += genes['weight'] | |
return sum | |
def labelToJSON(label): | |
return { "weight" : label['weight'], "label": label['name']} | |
def labelsToGroup(labels): | |
grps = [] | |
for label in labels: | |
grps.append(labelToJSON(label)) | |
return grps | |
def arrayToJSONArray(array): | |
return '[' + ','.join(array) + ']' | |
reader = csv.DictReader( csvfile, fieldnames) | |
for row in reader: | |
category = row['category'] | |
subCategory = row['subcategory'] | |
gene = {} | |
gene['name'] = row['gene'] | |
print(row['weight']) | |
gene['weight'] = float(row['weight']) | |
if category not in categories: | |
categories[category] = {} | |
if subCategory not in categories[category]: | |
categories[category][subCategory] = [] | |
categories[category][subCategory].append(gene) | |
#print(row) | |
#print(row['category']) | |
#json.dump(row, jsonfile) | |
#jsonfile.write('\n') | |
total = 0 | |
print("**************") | |
print("All categories") | |
print("**************") | |
for category in categories: | |
print('[' + str(total) + ']' + category) | |
total += 1 | |
total = 0 | |
print("******************") | |
print("All subcategories") | |
print("******************") | |
for category in categories: | |
for subcategory in categories[category]: | |
print('[' + str(total) + ']' + subcategory) | |
total += 1 | |
groups = [] | |
for category in categories: | |
subgroups = [] | |
genesPerSubCategory = 0.0 | |
for subcategory in categories[category]: | |
genes = categories[category][subcategory] | |
print('\tsubcategory : ' +subcategory + '(' + str(len(genes)) + ')') | |
subgroup = { "label" : subcategory , "weight": weightSum(genes), "groups" : labelsToGroup(genes) } | |
genesPerSubCategory += weightSum(genes) | |
subgroups.append(subgroup) | |
print('category : ' + category + '(' + str(genesPerSubCategory) + ')') | |
groups.append({ "label" : category , "weight": genesPerSubCategory, "groups" : subgroups }) | |
jsonOutput = { "groups" : groups } | |
json.dump(jsonOutput, jsonfile) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment