-
-
Save SergProduction/ab159b13598cdf84d9a0dc5bef2becb9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def file(name, flag = False): | |
| f = open(name) | |
| if flag == 'w': | |
| return f | |
| return f.readlines() | |
| def readfile(filename): | |
| lines = [line for line in file(filename)] | |
| print(len(lines)) | |
| # First line is the column titles | |
| colnames = lines[0].strip().split('\t')[1:] | |
| rownames = [] | |
| data = [] | |
| for line in lines[1:]: | |
| p = line.strip().split('\t') | |
| # First column in each row is the rowname | |
| rownames.append(p[0]) | |
| # The data for this row is the remainder of the row | |
| data.append([float(x) for x in p[1:]]) | |
| return (rownames, colnames, data) | |
| class bicluster: | |
| def __init__( | |
| self, | |
| vec, | |
| left=None, | |
| right=None, | |
| distance=0.0, | |
| id=None, | |
| ): | |
| self.left = left | |
| self.right = right | |
| self.vec = vec | |
| self.id = id | |
| self.distance = distance | |
| def hcluster(rows, distance=pearson): | |
| distances = {} | |
| currentclustid = -1 | |
| # Clusters are initially just the rows | |
| clust = [bicluster(rows[i], id=i) for i in range(len(rows))] | |
| while len(clust) > 1: | |
| lowestpair = (0, 1) | |
| closest = distance(clust[0].vec, clust[1].vec) | |
| # loop through every pair looking for the smallest distance | |
| for i in range(len(clust)): | |
| for j in range(i + 1, len(clust)): | |
| # distances is the cache of distance calculations | |
| if (clust[i].id, clust[j].id) not in distances: | |
| distances[(clust[i].id, clust[j].id)] = \ | |
| distance(clust[i].vec, clust[j].vec) | |
| d = distances[(clust[i].id, clust[j].id)] | |
| if d < closest: | |
| closest = d | |
| lowestpair = (i, j) | |
| # calculate the average of the two clusters | |
| mergevec = [(clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i]) | |
| / 2.0 for i in range(len(clust[0].vec))] | |
| # create the new cluster | |
| newcluster = bicluster(mergevec, left=clust[lowestpair[0]], | |
| right=clust[lowestpair[1]], distance=closest, | |
| id=currentclustid) | |
| # cluster ids that weren't in the original set are negative | |
| currentclustid -= 1 | |
| del clust[lowestpair[1]] | |
| del clust[lowestpair[0]] | |
| clust.append(newcluster) | |
| return clust[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment