Skip to content

Instantly share code, notes, and snippets.

@MLWhiz
Created January 20, 2019 18:07
Show Gist options
  • Save MLWhiz/998b8420a31bb00ec52185f36cc131e8 to your computer and use it in GitHub Desktop.
Save MLWhiz/998b8420a31bb00ec52185f36cc131e8 to your computer and use it in GitHub Desktop.
from graphframes import *
def vertices(line):
vert = [int(x) for x in line.split(" ")]
return vert
vertices = adjacency_list.flatMap(lambda x: vertices(x)).distinct().collect()
vertices = sqlContext.createDataFrame([[x] for x in vertices], ["id"])
def create_edges(line):
a = [int(x) for x in line.split(" ")]
edges_list=[]
if len(a)==1:
edges_list.append((a[0],a[0]))
for i in range(0, len(a)-1):
for j in range(i+1 ,len(a)):
edges_list.append((a[i],a[j]))
edges_list.append((a[j],a[i]))
return edges_list
edges = adjacency_list.flatMap(lambda x: create_edges(x)).distinct().collect()
edges = sqlContext.createDataFrame(edges, ["src", "dst"])
g = GraphFrame(vertices, edges)
sc.setCheckpointDir(".")
# graphframes uses the same paper we referenced apparently
cc = g.connectedComponents()
print cc.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment