Last active
March 19, 2017 09:03
-
-
Save AlphaSheep/7102b2dfaa5030465fcfbc0279a538f4 to your computer and use it in GitHub Desktop.
Cubing degrees of separation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import json | |
import time | |
def tic(): | |
tic.start = time.time() | |
def toc(): | |
print('Time elapsed:', round(time.time() - tic.start, 3),'s\n') | |
return (time.time() - tic.start) | |
def save(var, varName): | |
with open(varName+'.json','w') as saveFile: | |
json.dump(var, saveFile) | |
def load(varName): | |
with open(varName+'.json') as loadFile: | |
result = json.load(loadFile) | |
return result | |
tic() | |
print('Loading Distance Matrix') | |
compsList = load('compsList') | |
degrees = load('degrees') | |
toc() | |
for x in range(6): | |
compCount = 0 | |
for i in range(len(degrees)): | |
for j in range(len(degrees[i])): | |
if degrees[i][j] == x: | |
compCount += 1 | |
print('Distance: {:.0f} Number of competition combinations: {:8.0f}, ({:6.2f} %)'.format(x, compCount, compCount/(len(compsList)*(len(compsList)-1))*100)) | |
toc() | |
maxDegree = max([max(d) for d in degrees]) | |
print('Maximum degree of separation: {}'.format(maxDegree)) | |
print('Searching for longest chains') | |
longest = 5 | |
chainIndexes = [] | |
for i in range(len(degrees)-1): | |
for j in range(i+1, len(degrees[i])): | |
# for i in range(len(degrees)): | |
# for j in range(len(degrees[i])): | |
if degrees[i][j] >= longest: | |
chainIndexes.append((i,j)) | |
print('Distance:', degrees[i][j], '\t', i, compsList[i], '-->', compsList[j], j) | |
if degrees[i][j] == 0 and not i==j: | |
print(degrees[i][j], i, compsList[i], '-->', compsList[j], j, sep='\t') | |
save(chainIndexes, 'chainIndexes') | |
toc() | |
print('Longest Chains') | |
chains = [] | |
for i,j in chainIndexes: | |
#for i,j in [(2936,2937)]: | |
thisChain = [] #[[] for _ in range(maxDegree+1)] | |
for dist in range(maxDegree+1): | |
for link in range(len(degrees)): | |
if degrees[i][link] == dist and degrees[j][link] == maxDegree-dist: | |
thisChain.append(link) | |
break | |
chains.append(thisChain) | |
print(compsList[i], '-->', compsList[j]) | |
print(' ('+' -- '.join([compsList[link] for link in thisChain])+')') | |
print() | |
print() | |
save(chains,'chains') | |
toc() | |
print('Maximum distance from other competitions') | |
maxdistances = [max(d) for d in degrees] | |
for i in range(1,6): | |
count = sum([1 for d in maxdistances if d == i]) | |
print ('Distance: {:.0f}\t Number of competitions: {:.0f}'.format(i, count)) | |
toc() | |
print('Average distance from other competitions') | |
averagedistances = [sum(d)/len(d) for d in degrees] | |
averagedistances = [(i, averagedistances[i]) for i in range(len(averagedistances))] | |
averagedistances.sort(key=lambda x: x[1]) | |
for i in range(20): | |
x, dist = averagedistances[i] | |
print(i+1,'.\t', compsList[x],' '*(30-len(compsList[x])), 'Average:', round(dist,3) ,' \tMax:',maxdistances[x]) | |
print('...') | |
for i in range(len(averagedistances)-20, len(averagedistances)): | |
x, dist = averagedistances[i] | |
print(i+1,'.\t', compsList[x],' '*(30-len(compsList[x])), 'Average:', round(dist,3) ,' \tMax:',maxdistances[x]) | |
toc() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import json | |
import time | |
import math | |
def tic(): | |
tic.start = time.time() | |
def toc(): | |
print('Time elapsed:', round(time.time() - tic.start, 3),'s\n') | |
return (time.time() - tic.start) | |
def tocx(): | |
return (time.time() - tic.start) | |
def save(var, varName): | |
with open(varName+'.json','w') as saveFile: | |
json.dump(var, saveFile) | |
def load(varName): | |
with open(varName+'.json') as loadFile: | |
result = json.load(loadFile) | |
return result | |
tic() | |
print('Loading Distance Matrix') | |
compsList = load('compsList') | |
degrees = load('degrees') | |
compsIndex = {compsList[i]: i for i in range(len(compsList))} | |
nCompConnections = [sum([1 for d in dists if d == 1]) for dists in degrees] | |
toc() | |
print('Loading competition attendance data') | |
compsAttended = load('compsAttended') | |
compReg = load('compReg') | |
toc() | |
print('Assigning competitions to competitors') | |
compsPerPerson = [] | |
people = [] | |
for p in sorted(compsAttended.keys()): | |
people.append(p) | |
compsPerPerson.append([]) | |
for comp in compsAttended[p]: | |
compsPerPerson[-1].append(compsIndex[comp]) | |
print('Number of people:', len(people)) | |
toc() | |
print('Assigning competitors to competitions') | |
peoplePerComp = [] | |
peopleIndex = {people[i]: i for i in range(len(people))} | |
for c in compsList: | |
peoplePerComp.append([]) | |
for p in compReg[c]: | |
peoplePerComp[-1].append(peopleIndex[p]) | |
toc() | |
print('Grouping people by identical comp attendance') | |
i = 0 | |
peopleGroupedDict = {} | |
compsPerPeopleGroup = {} | |
for p in range(len(people)): | |
compsid = str(sorted(compsPerPerson[p])) | |
if not compsid in peopleGroupedDict.keys(): | |
peopleGroupedDict[compsid] = [] | |
compsPerPeopleGroup[compsid] = sorted(compsPerPerson[p], key = lambda c: -nCompConnections[c]) | |
peopleGroupedDict[compsid].append(people[p]) | |
compCombos = sorted(peopleGroupedDict.keys()) | |
nCombos = len(compCombos) | |
print(nCombos, 'unique comp combinations') | |
save(compCombos, 'compCombos') | |
save(peopleGroupedDict, 'peopleGroupedDict') | |
save(compsPerPeopleGroup, 'compsPerPeopleGroup') | |
peopleGroups = [] | |
compsLookup = [] | |
for pg in compCombos: | |
peopleGroups.append(peopleGroupedDict[pg]) | |
compsLookup.append(compsPerPeopleGroup[pg]) | |
toc() | |
print('Average distance per competitor') | |
avgDistByPeople = [] | |
def timef(s): | |
s = round(s) | |
return '{:.0f}:{:02.0f}:{:02.0f}'.format(math.floor(s/3600), math.floor(s/60)%60, s%60) | |
counter = 0 | |
total = len(peopleGroups)*len(peopleGroups) | |
ttotal = 6000 | |
peopleDistanceCounts = [] | |
try: | |
peopleDistanceCounts = load('peopleDistanceCounts') | |
avgDistByPeople = load('avgDistByPeople') | |
except FileNotFoundError: | |
for pgrp1 in range(len(peopleGroups)): | |
theseDists = [0 for _ in range(6)] | |
for pgrp2 in range(len(peopleGroups)): | |
if pgrp1 == pgrp2: | |
theseDists[0] += len(peopleGroups[pgrp2]) | |
continue | |
key = (min(pgrp1, pgrp2), max(pgrp1, pgrp2)) | |
dist = 5 | |
for c1 in compsLookup[pgrp1]: | |
if c1 in compsLookup[pgrp2]: | |
dist = 0 | |
break | |
else: | |
for c1 in compsLookup[pgrp1]: | |
for c2 in compsLookup[pgrp2]: | |
if degrees[c1][c2] < dist: | |
dist = degrees[c1][c2] | |
if dist == 1: | |
break | |
if dist == 1: | |
break | |
theseDists[dist] += len(peopleGroups[pgrp2]) | |
counter += 1 | |
if not counter%1000000: | |
ttotal = tpassed/fractionComplete | |
if not counter%200000: | |
fractionComplete = counter/total | |
tpassed = tocx() | |
tremain = ttotal - tpassed | |
print(round(fractionComplete*100,3), '%\t', | |
timef(tpassed), 'passed', timef(ttotal), 'total', timef(tremain), 'remaining. ', | |
'Rng', (round(min(avgDistByPeople),3), round(max(avgDistByPeople),3))) | |
peopleDistanceCounts.append(theseDists) | |
avgDistByPeople.append(sum([i*theseDists[i] for i in range(6)])/sum(theseDists)) | |
save(peopleDistanceCounts, 'peopleDistanceCounts') | |
save(avgDistByPeople, 'avgDistByPeople') | |
print('Range:', (round(min(avgDistByPeople),3), round(max(avgDistByPeople),3))) | |
toc() | |
print('Expanding counts to individual competitors') | |
distByPeople = {} | |
for pg in range(len(peopleGroups)): | |
for p in peopleGroups[pg]: | |
distByPeople[p] = peopleDistanceCounts[pg] | |
toc() | |
print('Competitors sorted by maximum distance') | |
maxDists = [sum([1 for d in distByPeople[p] if d > 0])-1 for p in distByPeople.keys()] | |
for i in range(1,6): | |
count = sum([1 for d in maxDists if d == i]) | |
print ('Distance: {:.0f}\t Number of people: {:.0f}'.format(i, count)) | |
toc() | |
print('Competitors sorted by average distance') | |
sortedPeople = sorted(distByPeople.keys(), key = lambda p: sum([i*distByPeople[p][i] for i in range(6)])/sum(distByPeople[p])) | |
for p in range(50): | |
avgDist = sum([i*distByPeople[sortedPeople[p]][i] for i in range(6)])/sum(distByPeople[sortedPeople[p]]) | |
print(p+1, '.\t', sortedPeople[p], ' Average: ', round(avgDist, 3), '\tDistribution: ', distByPeople[sortedPeople[p]], sep='') | |
print('...') | |
for p in range(len(sortedPeople)-20, len(sortedPeople)): | |
avgDist = sum([i*distByPeople[sortedPeople[p]][i] for i in range(6)])/sum(distByPeople[sortedPeople[p]]) | |
print(p+1, '.\t', sortedPeople[p], ' Average: ', round(avgDist, 3), '\tDistribution: ', distByPeople[sortedPeople[p]], sep='') | |
toc() | |
print("Competitors sorted by number of people they've been to competitions with") | |
sortedPeople = sorted(distByPeople.keys(), key = lambda p: -distByPeople[p][0]) | |
for p in range(50): | |
avgDist = sum([i*distByPeople[sortedPeople[p]][i] for i in range(6)])/sum(distByPeople[sortedPeople[p]]) | |
print(p+1, '.\t', sortedPeople[p], ' Average: ', round(avgDist, 3), '\tDistribution: ', distByPeople[sortedPeople[p]], sep='') | |
print('...') | |
for p in range(len(sortedPeople)-20, len(sortedPeople)): | |
avgDist = sum([i*distByPeople[sortedPeople[p]][i] for i in range(6)])/sum(distByPeople[sortedPeople[p]]) | |
print(p+1, '.\t', sortedPeople[p], ' Average: ', round(avgDist, 3), '\tDistribution: ', distByPeople[sortedPeople[p]], sep='') | |
toc() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import json | |
import time | |
import mysql.connector | |
def tic(): | |
tic.start = time.time() | |
def toc(): | |
print('Time elapsed:', round(time.time() - tic.start, 3),'s\n') | |
return (time.time() - tic.start) | |
def save(var, varName): | |
with open(varName+'.json','w') as saveFile: | |
json.dump(var, saveFile) | |
def load(varName): | |
with open(varName+'.json') as loadFile: | |
result = json.load(loadFile) | |
return result | |
tic() | |
conn = mysql.connector.connect(host = "localhost",user="root",passwd="root", db="mysql") | |
cursor = conn.cursor() | |
# print('Creating Competitor Table...') | |
# cursor.execute('DROP TABLE Competitors;') | |
# cursor.execute('CREATE TABLE Competitors AS SELECT DISTINCT PersonId, CompetitionId FROM Results;') | |
# toc() | |
print('Finding competitions...') | |
cursor.execute('SELECT DISTINCT CompetitionId FROM Competitors;') | |
compsList = [c[0] for c in cursor.fetchall()] | |
save(compsList, 'compsList') | |
toc() | |
print('Fetching Competitor Attendance...') | |
counter = 0 | |
try: | |
compRegCount = load('compRegCount') | |
compReg = load('compReg') | |
except FileNotFoundError: | |
compRegCount = {} | |
compReg = {} | |
for comp in compsList: | |
if comp not in compReg.keys(): | |
cursor.execute('SELECT PersonId FROM Competitors WHERE CompetitionId=%s;', (comp,)) | |
compReg[comp] = [p[0] for p in cursor.fetchall()] | |
compRegCount[comp] = len(compReg[comp]) | |
counter += 1 | |
if not counter%1000: | |
print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList), counter/len(compsList)*100)) | |
toc() | |
print('Saving...') | |
save(compReg, 'compReg') | |
save(compRegCount, 'compRegCount') | |
toc() | |
compsAttended = {} | |
counter = 0 | |
print('Fetching Competitions per Person...') | |
for comp in compsList: | |
for person in compReg[comp]: | |
if not person in compsAttended.keys(): | |
compsAttended[person] = [] | |
compsAttended[person].append(comp) | |
counter += 1 | |
if not counter%1000: | |
print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList), counter/len(compsList)*100)) | |
save(compsAttended, 'compsAttended') | |
toc() | |
def isFirstConnectionByPerson(comp1, comp2): | |
for person in compReg[comp1]: | |
if person in compReg[comp2]: | |
return True | |
return False | |
def isFirstConnection(n1, n2): | |
return n2 in connections[n1] | |
print('Finding Connections...') | |
try: | |
connections = load('connections') | |
except FileNotFoundError: | |
connections = [[] for _ in compsList] | |
counter = 1 | |
for i in range(len(compsList)-1): | |
for j in range(i+1, len(compsList)): | |
if isFirstConnection(compsList[i], compsList[j]): | |
connections[i].append(j) | |
connections[j].append(i) | |
counter += 1 | |
if not counter%50000: | |
print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*len(compsList)/2, counter/(len(compsList)*len(compsList)/2)*100)) | |
save(connections, 'connections') | |
nConnections = [len(c) for c in connections] | |
toc() | |
# print('Checking first degree connectedness...') | |
# try: | |
# firstdegrees = load('firstdegrees') | |
# except FileNotFoundError: | |
# connections = load('connections') | |
# counter = 0 | |
# firstdegrees = [[0 for _ in range(len(compsList))] for _ in range(len(compsList))] | |
# for i in range(len(compsList)-1): | |
# for j in range(i+1, len(compsList)): | |
# if isFirstConnection(i, j): | |
# firstdegrees[i][j] = 1 | |
# firstdegrees[j][i] = 1 | |
# counter += 1 | |
# if not counter%200000: | |
# print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*(len(compsList)-1)/2, counter/(len(compsList)*(len(compsList)-1)/2)*100)) | |
# save(firstdegrees, 'firstdegrees') | |
# toc() | |
# | |
# | |
# print('{:.2f} % first degree connections.'.format(sum([sum(d) for d in firstdegrees])/(len(compsList)*(len(compsList)-1))*100)) | |
# | |
# | |
# print('Checking full connectedness...') | |
# counter = 0 | |
# degrees = [[1 if firstdegrees[i][j] else 0 for i in range(len(compsList))] for j in range(len(compsList))] | |
# | |
# | |
# def connectionDegree(n1, n2): | |
# degree = 0 | |
# buffer = [n1] | |
# connectionFound = False | |
# while not connectionFound: | |
# nextBuffer = [] | |
# for b in buffer: | |
# if degrees[b][n2] > 0: | |
# return degree + degrees[b][n2] | |
# else: | |
# nextBuffer += connections[b] | |
# degree += 1 | |
# buffer = list(set(nextBuffer)) | |
# if degree > 4: | |
# raise Exception('Degree greater than max allowed: ({}, {})'.format(n1,n2)) | |
# | |
# | |
# try: | |
# degrees = load('degrees') | |
# except FileNotFoundError: | |
# for i in range(len(compsList)-1): | |
# for j in range(i+1, len(compsList)): | |
# thisDegree = connectionDegree(i,j) | |
# degrees[i][j] = thisDegree | |
# degrees[j][i] = thisDegree | |
# counter += 1 | |
# if not counter%50000: | |
# print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*(len(compsList)-1)/2, counter/(len(compsList)*(len(compsList)-1)/2)*100)) | |
# toc() | |
# save(degrees, 'degrees') | |
print('Filling distance table...') | |
distances = [[0 if i==j else -1 for i in range(len(compsList))] for j in range(len(compsList))] | |
def getForwardBuffer(level): | |
return [(i,j) for i in range(len(compsList)) for j in range(i,len(compsList)) if distances[i][j] == level] | |
def getBackwardBuffer(): | |
return [(i,j) for i in range(len(compsList)) for j in range(i,len(compsList)) if distances[i][j] == -1] | |
filled = 0 | |
level = 0 | |
total = len(compsList)*len(compsList) | |
while level<10: | |
if filled < total*0.33: | |
buffer = getForwardBuffer(level) | |
print('Level: {:.0f}, buffer length: {:.0f}. Forward search'.format(level, len(buffer))) | |
counter = 0 | |
toc() | |
for i,j in buffer: | |
for k in connections[i]: | |
if distances[j][k] == -1: | |
distances[j][k] = level+1 | |
distances[k][j] = level+1 | |
filled += 1 | |
for k in connections[j]: | |
if distances[i][k] == -1: | |
distances[i][k] = level+1 | |
distances[k][i] = level+1 | |
filled += 1 | |
counter += 1 | |
if not counter%20000: | |
print('filled: {:.0f}/{:.0f}\tlevel: {:.0f}\t({:.2f} % total, {:.2f} % of level)'.format(filled, total, level, filled/total*100, counter/len(buffer)*100)) | |
else: | |
buffer = getBackwardBuffer() | |
print('Level: {:.0f}, buffer length: {:.0f}. Backward search'.format(level, len(buffer))) | |
counter = 0 | |
for i,j in buffer: | |
for k in connections[i]: | |
if distances[j][k] == level and distances[i][j] == -1: | |
distances[i][j] = level+1 | |
distances[j][i] = level+1 | |
filled += 1 | |
for k in connections[j]: | |
if distances[i][k] == level and distances[i][j] == -1: | |
distances[i][j] = level+1 | |
distances[j][i] = level+1 | |
filled += 1 | |
counter += 1 | |
if not counter%20000: | |
print('filled: {:.0f}/{:.0f}\tlevel: {:.0f}\t({:.2f} % total, {:.2f} % of level)'.format(filled, total, level, filled/total*100, counter/len(buffer)*100)) | |
level += 1 | |
toc() | |
degrees = distances | |
save(degrees, 'degrees') | |
for x in range(15): | |
compCount = 0 | |
for i in range(len(degrees)): | |
for j in range(len(degrees[i])): | |
if distances[i][j] == x: | |
compCount += 1 | |
print('By competition: Distance: {:3.0f}, Count: {:8.0f}, ({:6.2f} %)'.format(x, compCount, compCount/(len(compsList)*(len(compsList)-1))*100)) | |
print('Maximum degree of separation: {}'.format(max([max(d) for d in degrees]))) | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment