Created
April 4, 2017 13:55
-
-
Save krassowski/af52a96365b491bea66256ae44a05636 to your computer and use it in GitHub Desktop.
BCD implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A partial, lazy implementation of BCD (http://genomebiology.biomedcentral.com/articles/10.1186/gb-2007-8-12-r271) | |
BiBS, Systems Biology workshops 2017 | |
""" | |
from math import sqrt | |
from networkx import Graph, number_of_nodes, connected_component_subgraphs | |
from networkx.algorithms import edge_betweenness_centrality | |
def test_graph(): | |
g = Graph() | |
for x in range(6): | |
g.add_node(x + 1) | |
g.add_edge(1, 2) | |
g.add_edge(3, 2) | |
g.add_edge(3, 1) | |
g.add_edge(3, 4) | |
g.add_edge(5, 4) | |
g.add_edge(6, 4) | |
return g | |
def networkx_from_sif(path, mode='r', delimeter=None): | |
"""Return NetworkX created from given SIF file""" | |
g = Graph() | |
with open(path, mode) as f: | |
for line in f: | |
try: | |
node_from, relation, *nodes_to = line.strip().split(delimeter) | |
g.add_node(node_from) | |
g.add_nodes_from(nodes_to) | |
for node_to in nodes_to: | |
g.add_edge(node_from, node_to, relation=relation) | |
except ValueError: | |
node_from = line.strip() | |
g.add_node(node_from) | |
return g | |
class Node: | |
def __init__(self, v=None, parent=None): | |
self.value = v | |
self.children = [] | |
self.parent = parent | |
self.is_cluster = False | |
@property | |
def is_special_parent(self): | |
return not self.value and any([c.value for c in self.children]) | |
def maximal_commonality(self): | |
pass | |
class BCD: | |
def __init__(self, graph): | |
self.graph = graph | |
self.N = number_of_nodes(graph) | |
self.commonalities = { | |
edge: self.commonality(edge) | |
for edge in self.graph.edges() | |
} | |
def commonality(self, edge): | |
a = edge[0] | |
b = edge[1] | |
n = self.graph.degree(a) | |
m = self.graph.degree(b) | |
k = ( | |
set(self.graph.neighbors(a)) | |
.intersection(set(self.graph.neighbors(b))) | |
.difference({a, b}) | |
) | |
k = n + m / self.N | |
return k + 1 / sqrt(n * m) | |
def calc(self): | |
if number_of_nodes(self.graph) == 1: | |
return Node(self.graph, parent=self.graph) | |
else: | |
parent = Node(None, parent=self.graph) | |
self.remove() | |
sub_graphs = connected_component_subgraphs(self.graph) | |
for graph in sub_graphs: | |
bcd = BCD(graph) | |
subtree = bcd.calc() | |
parent.children.append(subtree) | |
return parent | |
def remove(self): | |
edges_betweenness = edge_betweenness_centrality(self.graph) | |
fractions = { | |
edge: edges_betweenness[edge] / self.commonalities[edge] | |
for edge in self.graph.edges() | |
} | |
to_be_removed = max( | |
fractions.keys(), | |
key=(lambda key: fractions[key]) | |
) | |
self.graph.remove_edge(*to_be_removed) | |
def mark_clusters(node): | |
if node.is_special_parent: | |
node.is_cluster = True | |
else: | |
for child in node.children: | |
mark_clusters(child) | |
def merge_clusters(node): | |
# TO BE DONE | |
return node | |
def filter_out_false_positives(graph): | |
# TO BE DONE | |
return graph | |
def cluster_with_bcd(graph): | |
graph = filter_out_false_positives(graph) | |
bcd = BCD(graph) | |
tree = bcd.calc() # tree is the root | |
mark_clusters(tree) | |
return tree | |
# graph = read_gexf('yeast.gexf', relabel=True) | |
# graph = networkx_from_sif('yeast.gexf') | |
graph = test_graph() | |
clustered = cluster_with_bcd(graph) | |
# from IPython import embed | |
# embed() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment