Created
June 11, 2011 20:53
-
-
Save thouis/1020945 to your computer and use it in GitHub Desktop.
median of medians sorted by gene, sorted by gene
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xlrd | |
import sys | |
import numpy as np | |
book = xlrd.open_workbook(sys.argv[1]) | |
sheet = book.sheet_by_name('Normalization 1') | |
headers = [c.value for c in sheet.row(0)] | |
# find columns | |
colidx = dict([(name, headers.index(name)) for name in ['Gene', 'rep1', 'rep2', 'rep3']]) | |
# read data | |
data = {} | |
for rowidx in range(1, sheet.nrows): | |
row = sheet.row(rowidx) | |
gene = row[colidx['Gene']].value | |
vals = [row[colidx['rep%d' % rep]].value for rep in range(1, 4)] | |
data[gene] = data.get(gene, []) + [np.median(vals)] | |
# report, for each gene, median, second highest, and second lowest value | |
output = [] | |
print "Gene,\tMedian,\t2nd highest,\t2nd lowest,\tNumber" | |
for g, vals in data.iteritems(): | |
vals = sorted(vals) | |
output += ["%s,\t%f,\t%f,\t%f,\t%d"%(g, np.median(vals), vals[-2], vals[1], len(vals))] | |
output.sort() | |
print "\n".join(output) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment