Skip to content

Instantly share code, notes, and snippets.

@SergProduction
Created August 18, 2017 23:42
Show Gist options
  • Select an option

  • Save SergProduction/ab159b13598cdf84d9a0dc5bef2becb9 to your computer and use it in GitHub Desktop.

Select an option

Save SergProduction/ab159b13598cdf84d9a0dc5bef2becb9 to your computer and use it in GitHub Desktop.
def file(name, flag = False):
f = open(name)
if flag == 'w':
return f
return f.readlines()
def readfile(filename):
lines = [line for line in file(filename)]
print(len(lines))
# First line is the column titles
colnames = lines[0].strip().split('\t')[1:]
rownames = []
data = []
for line in lines[1:]:
p = line.strip().split('\t')
# First column in each row is the rowname
rownames.append(p[0])
# The data for this row is the remainder of the row
data.append([float(x) for x in p[1:]])
return (rownames, colnames, data)
class bicluster:
def __init__(
self,
vec,
left=None,
right=None,
distance=0.0,
id=None,
):
self.left = left
self.right = right
self.vec = vec
self.id = id
self.distance = distance
def hcluster(rows, distance=pearson):
distances = {}
currentclustid = -1
# Clusters are initially just the rows
clust = [bicluster(rows[i], id=i) for i in range(len(rows))]
while len(clust) > 1:
lowestpair = (0, 1)
closest = distance(clust[0].vec, clust[1].vec)
# loop through every pair looking for the smallest distance
for i in range(len(clust)):
for j in range(i + 1, len(clust)):
# distances is the cache of distance calculations
if (clust[i].id, clust[j].id) not in distances:
distances[(clust[i].id, clust[j].id)] = \
distance(clust[i].vec, clust[j].vec)
d = distances[(clust[i].id, clust[j].id)]
if d < closest:
closest = d
lowestpair = (i, j)
# calculate the average of the two clusters
mergevec = [(clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i])
/ 2.0 for i in range(len(clust[0].vec))]
# create the new cluster
newcluster = bicluster(mergevec, left=clust[lowestpair[0]],
right=clust[lowestpair[1]], distance=closest,
id=currentclustid)
# cluster ids that weren't in the original set are negative
currentclustid -= 1
del clust[lowestpair[1]]
del clust[lowestpair[0]]
clust.append(newcluster)
return clust[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment