Skip to content

Instantly share code, notes, and snippets.

@abhijangda
Last active April 7, 2019 18:55
Show Gist options
  • Save abhijangda/e6b0295d4afb0f3f916fd642f374e2e2 to your computer and use it in GitHub Desktop.
Save abhijangda/e6b0295d4afb0f3f916fd642f374e2e2 to your computer and use it in GitHub Desktop.
#Importing data testing network graph for bigger number of records
import networkx as nx
import pandas as pd
import numpy as np
df =pd.read_sas("C:\\My Codes\\SNA_New\\testdata3.sas7bdat",format='SAS7BDAT',encoding="ISO-8859-15")
#print(df)
df1 =pd.read_sas("C:\\My Codes\\SNA_New\\min_nbr_v3.sas7bdat",format='SAS7BDAT',encoding="ISO-8859-15")
L=df1.values.tolist()
#L
G5=nx.from_pandas_edgelist(df,'calling_num','called_num',create_using=nx.DiGraph())
nx.is_directed(G5)
G6=G5.to_undirected()
e=nx.non_edges(G6)
flat_set=set()
for sublist in L:
for item in sublist:
flat_set.insert(item)
L1=nx.jaccard_coefficient(G6)
L2=((u,v,z) for (u,v,z) in L1 if z>0)
L4=((u,v,z) for (u,v,z) in L2 if u in flat_set or v in flat_set)
import time
start=time.time()
import csv
with open("C:\\My Codes\\SNA_New\\jac_coeff_020419.csv","w+",newline="\n") as f:
n = 0
s = ""
start1=time.time()
for rec in L4:
n = n+1
s = s + str(rec[0]) + ", " + str(rec[1]) + ", " + str(rec[2]) + "\n" #one can optimize this further.
if (n == 10): #Max records to write at a time
end1=time.time()
print("string conversion",end1-start1) #string
start2=time.time()
f.write(s)
end2=time.time()
print("f.write",end2-start2) #string
start3=time.time()
f.flush()
end3=time.time()
print("f.flush",end3-start3) #string
n = 0
s = ""
break
f.write(s)
end=time.time()
print("overall",end-start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment