abhijangda · April 7, 2019 18:55
diff --git a/anup.py b/anup.py
 #Importing data testing network graph for bigger number of records
 import networkx as nx
 import pandas as pd
 import numpy as np
 df =pd.read_sas("C:\\My Codes\\SNA_New\\testdata3.sas7bdat",format='SAS7BDAT',encoding="ISO-8859-15")
 #print(df)
 df1 =pd.read_sas("C:\\My Codes\\SNA_New\\min_nbr_v3.sas7bdat",format='SAS7BDAT',encoding="ISO-8859-15")

 L=df1.values.tolist()
 #L
 G5=nx.from_pandas_edgelist(df,'calling_num','called_num',create_using=nx.DiGraph())
 nx.is_directed(G5)
 G6=G5.to_undirected()
 e=nx.non_edges(G6)
 flat_set=set()
 for sublist in L:
    for item in sublist:
        flat_set.insert(item)
 		
 L1=nx.jaccard_coefficient(G6)
 L2=((u,v,z) for (u,v,z) in L1 if z>0)
 L4=((u,v,z) for (u,v,z) in L2 if u in flat_set or v in flat_set)

 import time
 start=time.time()

 import csv
 with open("C:\\My Codes\\SNA_New\\jac_coeff_020419.csv","w+",newline="\n") as f:
    n = 0
    s = ""
    start1=time.time()
    for rec in L4:
        n = n+1
        s = s + str(rec[0]) + ", " + str(rec[1]) + ", " + str(rec[2]) + "\n" #one can optimize this further.
        
        if (n == 10): #Max records to write at a time
            end1=time.time()
            print("string conversion",end1-start1) #string
            start2=time.time()
            f.write(s)
            end2=time.time()
            print("f.write",end2-start2) #string
            start3=time.time()
            f.flush()
            end3=time.time()
            print("f.flush",end3-start3) #string
            n = 0
            s = ""
            break
    f.write(s)
    
 end=time.time()
 print("overall",end-start)
	#Importing data testing network graph for bigger number of records
	import networkx as nx
	import pandas as pd
	import numpy as np
	df =pd.read_sas("C:\\My Codes\\SNA_New\\testdata3.sas7bdat",format='SAS7BDAT',encoding="ISO-8859-15")
	#print(df)
	df1 =pd.read_sas("C:\\My Codes\\SNA_New\\min_nbr_v3.sas7bdat",format='SAS7BDAT',encoding="ISO-8859-15")

	L=df1.values.tolist()
	#L
	G5=nx.from_pandas_edgelist(df,'calling_num','called_num',create_using=nx.DiGraph())
	nx.is_directed(G5)
	G6=G5.to_undirected()
	e=nx.non_edges(G6)
	flat_set=set()
	for sublist in L:
	for item in sublist:
	flat_set.insert(item)

	L1=nx.jaccard_coefficient(G6)
	L2=((u,v,z) for (u,v,z) in L1 if z>0)
	L4=((u,v,z) for (u,v,z) in L2 if u in flat_set or v in flat_set)

	import time
	start=time.time()

	import csv
	with open("C:\\My Codes\\SNA_New\\jac_coeff_020419.csv","w+",newline="\n") as f:
	n = 0
	s = ""
	start1=time.time()
	for rec in L4:
	n = n+1
	s = s + str(rec[0]) + ", " + str(rec[1]) + ", " + str(rec[2]) + "\n" #one can optimize this further.

	if (n == 10): #Max records to write at a time
	end1=time.time()
	print("string conversion",end1-start1) #string
	start2=time.time()
	f.write(s)
	end2=time.time()
	print("f.write",end2-start2) #string
	start3=time.time()
	f.flush()
	end3=time.time()
	print("f.flush",end3-start3) #string
	n = 0
	s = ""
	break
	f.write(s)

	end=time.time()
	print("overall",end-start)
No results found