seanlinehan · September 13, 2015 05:19
diff --git a/optimize.py b/optimize.py
 #!/usr/bin/env python
 from __future__ import division
 import random, sys, math
 from collections import defaultdict

 # Convenience
 def remove_at_index(l, index):
    return l[:index] + l[index+1:]

 def sort_population(population):
    return sorted(population.iteritems(), key=lambda i: len(i[1]), reverse=True)

 # Here we determine which member of the population is dragging
 # down the total number of shared followers the most.
 def find_worst(population):
    comparison = None
    worst_index = None
    for i in range(len(population)):
        new_users = remove_at_index(population, i)
        new_metric = len(get_intersection(new_users))

        if comparison is None:
            comparison = new_metric
            worst_index = i
        elif comparison > new_metric:
            comparison = new_metric
            worst_index = i

    return worst_index

 # Gets a replacement person from the population at random,
 # taking care not to return a person who is already in the
 # test set.
 def get_replacement(population, existing):
    index = random.randrange(len(population))
    replacement = population[index]
    if replacement[0] in [e[0] for e in existing]:
        return get_replacement(population, existing)
    else:
        return replacement

 # Finds the common followers amongst the population
 def get_intersection(population):
    # pull out the population' following list
    list_of_follower_lists = map(lambda x: x[1], population)
    return set(list_of_follower_lists[0]).intersection(*list_of_follower_lists)

 # This is our annealling method. Constants chosen because
 # I liked the way the decay looked. Not emperical.
 def heat(i, rounds):
    return (rounds * math.pow((1/15),(i/(rounds/2)))/100)

 def optimize(population, rounds):
    population = sort_population(population)
    # Set our seed set to a pretty good default
    test_set = population[:10]
    # To compare other sets against
    highest_exposure = len(get_intersection(test_set))
    print "Baseline: %d" % highest_exposure
    print "=============="

    for i in range(rounds):
        # Find somebody to remove
        index = find_worst(test_set)
        # Find somebody to replace them
        replacement = get_replacement(population, test_set)

        # Here we get our new test set
        # We are using a form of simulated annealing to try
        # to avoid local maximas
        if random.random() < heat(i, rounds):
            new_set = remove_at_index(test_set, random.randrange(len(test_set)))
            new_set.append(replacement)
        else:
            new_set = remove_at_index(test_set, index)
            new_set.append(replacement)

        # Test it out
        exposure = len(get_intersection(new_set))
        if exposure > highest_exposure:
            highest_exposure = exposure
            test_set = new_set

    print "Conclusion: %d" % highest_exposure
    return test_set

 if __name__ == '__main__':
  filtered = ["FILTERED LIST FROM PREVIOUS STEP"]
  # Run the experiment 10 times in a row
  for i in range(10):
    # With 100,000 rounds for each experiment
    best_set = optimize(filtered, 100000)
    for person in best_set:
        print "https://twitter.com/intent/user?user_id=%s" % person[0]
	#!/usr/bin/env python
	from __future__ import division
	import random, sys, math
	from collections import defaultdict

	# Convenience
	def remove_at_index(l, index):
	return l[:index] + l[index+1:]

	def sort_population(population):
	return sorted(population.iteritems(), key=lambda i: len(i[1]), reverse=True)

	# Here we determine which member of the population is dragging
	# down the total number of shared followers the most.
	def find_worst(population):
	comparison = None
	worst_index = None
	for i in range(len(population)):
	new_users = remove_at_index(population, i)
	new_metric = len(get_intersection(new_users))

	if comparison is None:
	comparison = new_metric
	worst_index = i
	elif comparison > new_metric:
	comparison = new_metric
	worst_index = i

	return worst_index

	# Gets a replacement person from the population at random,
	# taking care not to return a person who is already in the
	# test set.
	def get_replacement(population, existing):
	index = random.randrange(len(population))
	replacement = population[index]
	if replacement[0] in [e[0] for e in existing]:
	return get_replacement(population, existing)
	else:
	return replacement

	# Finds the common followers amongst the population
	def get_intersection(population):
	# pull out the population' following list
	list_of_follower_lists = map(lambda x: x[1], population)
	return set(list_of_follower_lists[0]).intersection(*list_of_follower_lists)

	# This is our annealling method. Constants chosen because
	# I liked the way the decay looked. Not emperical.
	def heat(i, rounds):
	return (rounds * math.pow((1/15),(i/(rounds/2)))/100)

	def optimize(population, rounds):
	population = sort_population(population)
	# Set our seed set to a pretty good default
	test_set = population[:10]
	# To compare other sets against
	highest_exposure = len(get_intersection(test_set))
	print "Baseline: %d" % highest_exposure
	print "=============="

	for i in range(rounds):
	# Find somebody to remove
	index = find_worst(test_set)
	# Find somebody to replace them
	replacement = get_replacement(population, test_set)

	# Here we get our new test set
	# We are using a form of simulated annealing to try
	# to avoid local maximas
	if random.random() < heat(i, rounds):
	new_set = remove_at_index(test_set, random.randrange(len(test_set)))
	new_set.append(replacement)
	else:
	new_set = remove_at_index(test_set, index)
	new_set.append(replacement)

	# Test it out
	exposure = len(get_intersection(new_set))
	if exposure > highest_exposure:
	highest_exposure = exposure
	test_set = new_set

	print "Conclusion: %d" % highest_exposure
	return test_set

	if __name__ == '__main__':
	filtered = ["FILTERED LIST FROM PREVIOUS STEP"]
	# Run the experiment 10 times in a row
	for i in range(10):
	# With 100,000 rounds for each experiment
	best_set = optimize(filtered, 100000)
	for person in best_set:
	print "https://twitter.com/intent/user?user_id=%s" % person[0]