Skip to content

Instantly share code, notes, and snippets.

@seanlinehan
Created September 13, 2015 05:19
Show Gist options
  • Save seanlinehan/ffca4e196de8ff7679ee to your computer and use it in GitHub Desktop.
Save seanlinehan/ffca4e196de8ff7679ee to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from __future__ import division
import random, sys, math
from collections import defaultdict
# Convenience
def remove_at_index(l, index):
return l[:index] + l[index+1:]
def sort_population(population):
return sorted(population.iteritems(), key=lambda i: len(i[1]), reverse=True)
# Here we determine which member of the population is dragging
# down the total number of shared followers the most.
def find_worst(population):
comparison = None
worst_index = None
for i in range(len(population)):
new_users = remove_at_index(population, i)
new_metric = len(get_intersection(new_users))
if comparison is None:
comparison = new_metric
worst_index = i
elif comparison > new_metric:
comparison = new_metric
worst_index = i
return worst_index
# Gets a replacement person from the population at random,
# taking care not to return a person who is already in the
# test set.
def get_replacement(population, existing):
index = random.randrange(len(population))
replacement = population[index]
if replacement[0] in [e[0] for e in existing]:
return get_replacement(population, existing)
else:
return replacement
# Finds the common followers amongst the population
def get_intersection(population):
# pull out the population' following list
list_of_follower_lists = map(lambda x: x[1], population)
return set(list_of_follower_lists[0]).intersection(*list_of_follower_lists)
# This is our annealling method. Constants chosen because
# I liked the way the decay looked. Not emperical.
def heat(i, rounds):
return (rounds * math.pow((1/15),(i/(rounds/2)))/100)
def optimize(population, rounds):
population = sort_population(population)
# Set our seed set to a pretty good default
test_set = population[:10]
# To compare other sets against
highest_exposure = len(get_intersection(test_set))
print "Baseline: %d" % highest_exposure
print "=============="
for i in range(rounds):
# Find somebody to remove
index = find_worst(test_set)
# Find somebody to replace them
replacement = get_replacement(population, test_set)
# Here we get our new test set
# We are using a form of simulated annealing to try
# to avoid local maximas
if random.random() < heat(i, rounds):
new_set = remove_at_index(test_set, random.randrange(len(test_set)))
new_set.append(replacement)
else:
new_set = remove_at_index(test_set, index)
new_set.append(replacement)
# Test it out
exposure = len(get_intersection(new_set))
if exposure > highest_exposure:
highest_exposure = exposure
test_set = new_set
print "Conclusion: %d" % highest_exposure
return test_set
if __name__ == '__main__':
filtered = ["FILTERED LIST FROM PREVIOUS STEP"]
# Run the experiment 10 times in a row
for i in range(10):
# With 100,000 rounds for each experiment
best_set = optimize(filtered, 100000)
for person in best_set:
print "https://twitter.com/intent/user?user_id=%s" % person[0]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment