Created
November 13, 2013 23:35
-
-
Save dcrystalj/7458507 to your computer and use it in GitHub Desktop.
GGGG
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from snap import * | |
| import csv | |
| import collections | |
| #helpers | |
| def getNeighbours(G, i): | |
| neighborsV = TIntV() | |
| GetNodesAtHop(G, i, 1, neighborsV, False) | |
| return [i for i in neighborsV] | |
| def getNeighboursOfNeigbours(G, i): | |
| neighborsV = TIntV() | |
| GetNodesAtHop(G, i, 2, neighborsV, False) | |
| return [i for i in neighborsV] | |
| def isFriend(G, i, j): | |
| if G.GetIntAttrDatN(i, "person") and G.GetIntAttrDatN(j, "person"): | |
| return G.IsEdge(i,j) | |
| return False | |
| def friendCriteria(i, j): | |
| if i==0: return 0 | |
| ratio = float(i)/j | |
| if ratio > 0.30: return 10 | |
| elif ratio > 0.27: return 9 | |
| elif ratio > 0.24: return 8 | |
| elif ratio > 0.21: return 7 | |
| elif ratio > 0.18: return 6 | |
| elif ratio > 0.15: return 5 | |
| elif ratio > 0.12: return 4 | |
| elif ratio > 0.09: return 3 | |
| elif ratio > 0.06: return 2 | |
| elif ratio > 0.03: return 1 | |
| else: return 0 | |
| def herdingCriteria(i, j): | |
| if i==0: return 0 | |
| ratio = float(i)/j | |
| if ratio > 0.60: return 10 | |
| elif ratio > 0.54: return 9 | |
| elif ratio > 0.48: return 8 | |
| elif ratio > 0.42: return 7 | |
| elif ratio > 0.36: return 6 | |
| elif ratio > 0.30: return 5 | |
| elif ratio > 0.24: return 4 | |
| elif ratio > 0.18: return 3 | |
| elif ratio > 0.12: return 2 | |
| elif ratio > 0.06: return 1 | |
| else: return 0 | |
| ############ | |
| # FILENAME = "FoursquareCheckins20110101-20110731.csv" | |
| # FILENAME = "FScheckinstest.csv" # testni z manj zapis | |
| FILENAME = "custom.csv" # testni | |
| #Generate GRAPH | |
| def generate(FILENAME): | |
| G = TNEANet.New() | |
| Gf = TNEANet.New() | |
| #read file | |
| inputfile = csv.reader( open(FILENAME), dialect='excel' ) | |
| inputfile.next() # skip first line | |
| most_popular_loc = [0, 1] | |
| #add nodes | |
| for i in inputfile: | |
| #avoid 0 and -1 | |
| person = int(i[0]) +1 | |
| location = int(i[4]) * -1 -2 | |
| #add missing nodes | |
| if not G.IsNode(person): | |
| G.AddNode(person) | |
| G.AddIntAttrDatN(person, 1, "person") # 1 = true 0 = False | |
| Gf.AddNode(person) | |
| Gf.AddIntAttrDatN(person, 1, "person") # 1 = true 0 = False | |
| if not G.IsNode(location): | |
| G.AddNode(location) | |
| G.AddIntAttrDatN(location, 0, "person") | |
| Gf.AddNode(location) | |
| Gf.AddIntAttrDatN(location, 0, "person") | |
| #to get value from node: | |
| # val = TStrV() | |
| # G.StrAttrValueNI(NodeId, val) | |
| # val[0] | |
| #add connections | |
| if not G.IsEdge(person, location): | |
| EId = G.AddEdge(person, location) | |
| G.AddIntAttrDatE(EId, 1, "checkin") | |
| EId = G.AddEdge(location, person) | |
| G.AddIntAttrDatE(EId, 1, "checkin") | |
| else: | |
| EId1 = G.GetEId(person, location) | |
| EId2 = G.GetEId(location, person) | |
| EVal = G.GetIntAttrDatE(EId, "checkin")+1 | |
| G.AddIntAttrDatE(EId1, EVal, "checkin") #increase value of checkin if person is twice on same place | |
| G.AddIntAttrDatE(EId2, EVal, "checkin") #increase value of checkin if person is twice on same place | |
| #update most popular location | |
| outDeg = G.GetNI(location).GetOutDeg() | |
| if most_popular_loc[1] < outDeg: | |
| most_popular_loc = [location, outDeg] | |
| #add friends | |
| # FRIENDSFILE = "FoursquareFriendship.csv" | |
| FRIENDSFILE = "customFriends.csv" | |
| inputfile = csv.reader( open(FRIENDSFILE), dialect='excel' ) | |
| inputfile.next() | |
| for i in inputfile: | |
| if not G.IsNode(int(i[0])+1): | |
| G.AddNode(int(i[0])+1) | |
| Gf.AddNode(int(i[0])+1) | |
| if not G.IsNode( int(i[1])+1 ): | |
| G.AddNode(int(i[1])+1) | |
| Gf.AddNode(int(i[1])+1) | |
| G.AddEdge(int(i[0])+1, int(i[1])+1) | |
| G.AddEdge(int(i[1])+1, int(i[0])+1) | |
| #SaveEdgeList(G, "FSGraph.g") | |
| FOut = TFOut(FILENAME + '.bin') | |
| G.Save(FOut) | |
| FOut.Flush() | |
| FOut = TFOut(FILENAME + 'f.bin') | |
| Gf.Save(FOut) | |
| FOut.Flush() | |
| return most_popular_loc | |
| print generate(FILENAME) | |
| # most_popular_loc = [-81, 33] #location, max checkins | |
| most_popular_loc = [0, 1] #location, max checkins | |
| FIn = TFIn(FILENAME + '.bin') | |
| G = TNEANet.Load(FIn) | |
| FInf = TFIn(FILENAME + 'f.bin') | |
| Gf = TNEANet.Load(FInf) | |
| #G = LoadEdgeList(PNEANet, "FSGraph.g", 0, 1) | |
| print "stevilo vozlisc", G.GetNodes(), "edgov", G.GetEdges() | |
| #-za prijateljstvo..ce je blo na neki lokaciji 30% ali vec prijateljev neke osebe, dobi povezava med to osebo in lokacijo vseh 10 tock | |
| #..27% 9, 24% 8, ... 3% 1 tocko (oz. za eno tocko lah recemo ze, da je bil vsaj kaksen prijatelj tam) | |
| # possibleFriendConnections = collections.defaultdict(lambda: []) | |
| nodes = [i.GetId() for i in G.Nodes()] | |
| persons = [i for i in nodes if G.GetIntAttrDatN(i, "person")] | |
| locations = list(set(nodes) - set(persons)) | |
| print "friendships" | |
| for nodeid in persons: | |
| neigbours = getNeighbours(G, nodeid) #get all connections | |
| # if G.GetIntAttrDatN(nodeid, "person"): #if node is person | |
| friends = filter(lambda x: G.GetIntAttrDatN(x, "person"), neigbours) #get friends of user | |
| # places = list(set(neigbours) - set(friends)) #get places where user have checked in | |
| neighboursOfNeigbours = getNeighboursOfNeigbours(G, nodeid) | |
| possible_new_places = filter(lambda x: not G.GetIntAttrDatN(x, "person"), neighboursOfNeigbours) #get places of user friends, so at second hop | |
| # print "nid", nodeid,"friends",friends,"places",places,"neighboursOfNeigbours", neighboursOfNeigbours, "possible nw places", possible_new_places | |
| #iterate over possible new places | |
| for i in possible_new_places: | |
| #count friends on possible new place | |
| friendCheckin = len([1 for j in friends if G.IsEdge(i, j)]) | |
| #calculate score for new place and save user => [place, score from 0 to 10] | |
| placeScore = friendCriteria(friendCheckin, len(friends)) | |
| #save score to new edge | |
| if placeScore > 0: | |
| EId1 = Gf.AddEdge(nodeid, i) | |
| EId2 = Gf.AddEdge(i, nodeid) | |
| Gf.AddIntAttrDatE(EId1, placeScore, "checkin") | |
| Gf.AddIntAttrDatE(EId2, placeScore, "checkin") | |
| #resitev na key value | |
| # possibleFriendConnections[nodeid].append( [i, placeScore] ) | |
| print "herding" | |
| #herding | |
| for i in locations: | |
| neigbours = getNeighbours(G, i) #get all connections | |
| score = herdingCriteria(len(neigbours), most_popular_loc[1]) | |
| if score > 0: #if > add connection from place to new user | |
| for j in persons: | |
| if not G.IsEdge(i, j): | |
| EId1 = Gf.AddEdge(i, j) | |
| EId2 = Gf.AddEdge(j, i) | |
| Gf.AddIntAttrDatE(EId1, placeScore, "herd") | |
| Gf.AddIntAttrDatE(EId2, placeScore, "herd") | |
| #print possibleFriendConnections.values() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment