Skip to content

Instantly share code, notes, and snippets.

@dcrystalj
Created November 13, 2013 23:35
Show Gist options
  • Save dcrystalj/7458507 to your computer and use it in GitHub Desktop.
Save dcrystalj/7458507 to your computer and use it in GitHub Desktop.
GGGG
from snap import *
import csv
import collections
#helpers
def getNeighbours(G, i):
neighborsV = TIntV()
GetNodesAtHop(G, i, 1, neighborsV, False)
return [i for i in neighborsV]
def getNeighboursOfNeigbours(G, i):
neighborsV = TIntV()
GetNodesAtHop(G, i, 2, neighborsV, False)
return [i for i in neighborsV]
def isFriend(G, i, j):
if G.GetIntAttrDatN(i, "person") and G.GetIntAttrDatN(j, "person"):
return G.IsEdge(i,j)
return False
def friendCriteria(i, j):
if i==0: return 0
ratio = float(i)/j
if ratio > 0.30: return 10
elif ratio > 0.27: return 9
elif ratio > 0.24: return 8
elif ratio > 0.21: return 7
elif ratio > 0.18: return 6
elif ratio > 0.15: return 5
elif ratio > 0.12: return 4
elif ratio > 0.09: return 3
elif ratio > 0.06: return 2
elif ratio > 0.03: return 1
else: return 0
def herdingCriteria(i, j):
if i==0: return 0
ratio = float(i)/j
if ratio > 0.60: return 10
elif ratio > 0.54: return 9
elif ratio > 0.48: return 8
elif ratio > 0.42: return 7
elif ratio > 0.36: return 6
elif ratio > 0.30: return 5
elif ratio > 0.24: return 4
elif ratio > 0.18: return 3
elif ratio > 0.12: return 2
elif ratio > 0.06: return 1
else: return 0
############
# FILENAME = "FoursquareCheckins20110101-20110731.csv"
# FILENAME = "FScheckinstest.csv" # testni z manj zapis
FILENAME = "custom.csv" # testni
#Generate GRAPH
def generate(FILENAME):
G = TNEANet.New()
Gf = TNEANet.New()
#read file
inputfile = csv.reader( open(FILENAME), dialect='excel' )
inputfile.next() # skip first line
most_popular_loc = [0, 1]
#add nodes
for i in inputfile:
#avoid 0 and -1
person = int(i[0]) +1
location = int(i[4]) * -1 -2
#add missing nodes
if not G.IsNode(person):
G.AddNode(person)
G.AddIntAttrDatN(person, 1, "person") # 1 = true 0 = False
Gf.AddNode(person)
Gf.AddIntAttrDatN(person, 1, "person") # 1 = true 0 = False
if not G.IsNode(location):
G.AddNode(location)
G.AddIntAttrDatN(location, 0, "person")
Gf.AddNode(location)
Gf.AddIntAttrDatN(location, 0, "person")
#to get value from node:
# val = TStrV()
# G.StrAttrValueNI(NodeId, val)
# val[0]
#add connections
if not G.IsEdge(person, location):
EId = G.AddEdge(person, location)
G.AddIntAttrDatE(EId, 1, "checkin")
EId = G.AddEdge(location, person)
G.AddIntAttrDatE(EId, 1, "checkin")
else:
EId1 = G.GetEId(person, location)
EId2 = G.GetEId(location, person)
EVal = G.GetIntAttrDatE(EId, "checkin")+1
G.AddIntAttrDatE(EId1, EVal, "checkin") #increase value of checkin if person is twice on same place
G.AddIntAttrDatE(EId2, EVal, "checkin") #increase value of checkin if person is twice on same place
#update most popular location
outDeg = G.GetNI(location).GetOutDeg()
if most_popular_loc[1] < outDeg:
most_popular_loc = [location, outDeg]
#add friends
# FRIENDSFILE = "FoursquareFriendship.csv"
FRIENDSFILE = "customFriends.csv"
inputfile = csv.reader( open(FRIENDSFILE), dialect='excel' )
inputfile.next()
for i in inputfile:
if not G.IsNode(int(i[0])+1):
G.AddNode(int(i[0])+1)
Gf.AddNode(int(i[0])+1)
if not G.IsNode( int(i[1])+1 ):
G.AddNode(int(i[1])+1)
Gf.AddNode(int(i[1])+1)
G.AddEdge(int(i[0])+1, int(i[1])+1)
G.AddEdge(int(i[1])+1, int(i[0])+1)
#SaveEdgeList(G, "FSGraph.g")
FOut = TFOut(FILENAME + '.bin')
G.Save(FOut)
FOut.Flush()
FOut = TFOut(FILENAME + 'f.bin')
Gf.Save(FOut)
FOut.Flush()
return most_popular_loc
print generate(FILENAME)
# most_popular_loc = [-81, 33] #location, max checkins
most_popular_loc = [0, 1] #location, max checkins
FIn = TFIn(FILENAME + '.bin')
G = TNEANet.Load(FIn)
FInf = TFIn(FILENAME + 'f.bin')
Gf = TNEANet.Load(FInf)
#G = LoadEdgeList(PNEANet, "FSGraph.g", 0, 1)
print "stevilo vozlisc", G.GetNodes(), "edgov", G.GetEdges()
#-za prijateljstvo..ce je blo na neki lokaciji 30% ali vec prijateljev neke osebe, dobi povezava med to osebo in lokacijo vseh 10 tock
#..27% 9, 24% 8, ... 3% 1 tocko (oz. za eno tocko lah recemo ze, da je bil vsaj kaksen prijatelj tam)
# possibleFriendConnections = collections.defaultdict(lambda: [])
nodes = [i.GetId() for i in G.Nodes()]
persons = [i for i in nodes if G.GetIntAttrDatN(i, "person")]
locations = list(set(nodes) - set(persons))
print "friendships"
for nodeid in persons:
neigbours = getNeighbours(G, nodeid) #get all connections
# if G.GetIntAttrDatN(nodeid, "person"): #if node is person
friends = filter(lambda x: G.GetIntAttrDatN(x, "person"), neigbours) #get friends of user
# places = list(set(neigbours) - set(friends)) #get places where user have checked in
neighboursOfNeigbours = getNeighboursOfNeigbours(G, nodeid)
possible_new_places = filter(lambda x: not G.GetIntAttrDatN(x, "person"), neighboursOfNeigbours) #get places of user friends, so at second hop
# print "nid", nodeid,"friends",friends,"places",places,"neighboursOfNeigbours", neighboursOfNeigbours, "possible nw places", possible_new_places
#iterate over possible new places
for i in possible_new_places:
#count friends on possible new place
friendCheckin = len([1 for j in friends if G.IsEdge(i, j)])
#calculate score for new place and save user => [place, score from 0 to 10]
placeScore = friendCriteria(friendCheckin, len(friends))
#save score to new edge
if placeScore > 0:
EId1 = Gf.AddEdge(nodeid, i)
EId2 = Gf.AddEdge(i, nodeid)
Gf.AddIntAttrDatE(EId1, placeScore, "checkin")
Gf.AddIntAttrDatE(EId2, placeScore, "checkin")
#resitev na key value
# possibleFriendConnections[nodeid].append( [i, placeScore] )
print "herding"
#herding
for i in locations:
neigbours = getNeighbours(G, i) #get all connections
score = herdingCriteria(len(neigbours), most_popular_loc[1])
if score > 0: #if > add connection from place to new user
for j in persons:
if not G.IsEdge(i, j):
EId1 = Gf.AddEdge(i, j)
EId2 = Gf.AddEdge(j, i)
Gf.AddIntAttrDatE(EId1, placeScore, "herd")
Gf.AddIntAttrDatE(EId2, placeScore, "herd")
#print possibleFriendConnections.values()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment