Created
January 20, 2019 18:07
-
-
Save MLWhiz/998b8420a31bb00ec52185f36cc131e8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from graphframes import * | |
def vertices(line): | |
vert = [int(x) for x in line.split(" ")] | |
return vert | |
vertices = adjacency_list.flatMap(lambda x: vertices(x)).distinct().collect() | |
vertices = sqlContext.createDataFrame([[x] for x in vertices], ["id"]) | |
def create_edges(line): | |
a = [int(x) for x in line.split(" ")] | |
edges_list=[] | |
if len(a)==1: | |
edges_list.append((a[0],a[0])) | |
for i in range(0, len(a)-1): | |
for j in range(i+1 ,len(a)): | |
edges_list.append((a[i],a[j])) | |
edges_list.append((a[j],a[i])) | |
return edges_list | |
edges = adjacency_list.flatMap(lambda x: create_edges(x)).distinct().collect() | |
edges = sqlContext.createDataFrame(edges, ["src", "dst"]) | |
g = GraphFrame(vertices, edges) | |
sc.setCheckpointDir(".") | |
# graphframes uses the same paper we referenced apparently | |
cc = g.connectedComponents() | |
print cc.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment