Created
September 13, 2015 05:19
-
-
Save seanlinehan/ffca4e196de8ff7679ee to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from __future__ import division | |
import random, sys, math | |
from collections import defaultdict | |
# Convenience | |
def remove_at_index(l, index): | |
return l[:index] + l[index+1:] | |
def sort_population(population): | |
return sorted(population.iteritems(), key=lambda i: len(i[1]), reverse=True) | |
# Here we determine which member of the population is dragging | |
# down the total number of shared followers the most. | |
def find_worst(population): | |
comparison = None | |
worst_index = None | |
for i in range(len(population)): | |
new_users = remove_at_index(population, i) | |
new_metric = len(get_intersection(new_users)) | |
if comparison is None: | |
comparison = new_metric | |
worst_index = i | |
elif comparison > new_metric: | |
comparison = new_metric | |
worst_index = i | |
return worst_index | |
# Gets a replacement person from the population at random, | |
# taking care not to return a person who is already in the | |
# test set. | |
def get_replacement(population, existing): | |
index = random.randrange(len(population)) | |
replacement = population[index] | |
if replacement[0] in [e[0] for e in existing]: | |
return get_replacement(population, existing) | |
else: | |
return replacement | |
# Finds the common followers amongst the population | |
def get_intersection(population): | |
# pull out the population' following list | |
list_of_follower_lists = map(lambda x: x[1], population) | |
return set(list_of_follower_lists[0]).intersection(*list_of_follower_lists) | |
# This is our annealling method. Constants chosen because | |
# I liked the way the decay looked. Not emperical. | |
def heat(i, rounds): | |
return (rounds * math.pow((1/15),(i/(rounds/2)))/100) | |
def optimize(population, rounds): | |
population = sort_population(population) | |
# Set our seed set to a pretty good default | |
test_set = population[:10] | |
# To compare other sets against | |
highest_exposure = len(get_intersection(test_set)) | |
print "Baseline: %d" % highest_exposure | |
print "==============" | |
for i in range(rounds): | |
# Find somebody to remove | |
index = find_worst(test_set) | |
# Find somebody to replace them | |
replacement = get_replacement(population, test_set) | |
# Here we get our new test set | |
# We are using a form of simulated annealing to try | |
# to avoid local maximas | |
if random.random() < heat(i, rounds): | |
new_set = remove_at_index(test_set, random.randrange(len(test_set))) | |
new_set.append(replacement) | |
else: | |
new_set = remove_at_index(test_set, index) | |
new_set.append(replacement) | |
# Test it out | |
exposure = len(get_intersection(new_set)) | |
if exposure > highest_exposure: | |
highest_exposure = exposure | |
test_set = new_set | |
print "Conclusion: %d" % highest_exposure | |
return test_set | |
if __name__ == '__main__': | |
filtered = ["FILTERED LIST FROM PREVIOUS STEP"] | |
# Run the experiment 10 times in a row | |
for i in range(10): | |
# With 100,000 rounds for each experiment | |
best_set = optimize(filtered, 100000) | |
for person in best_set: | |
print "https://twitter.com/intent/user?user_id=%s" % person[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment