alanf · April 24, 2011 00:41
diff --git a/experiment_simulation.py b/experiment_simulation.py
 """                                                                                                                                                                                                                                         
 Run a simulation of multiple concurrent A/B tests acting on a number of users.
 Reports on the expected vs. the actual results, and raises a warning if the actual results would lead to the wrong conclusion.
 Values of interest:
 -- number_of_users : active users who may be part of an A/B test
 -- number_of_concurrent_experiments: how many random concurrent experiments to generate
 -- values in construct_random_experiments(): define a range and distribution for utility and B cohort percentage for each experiment
 """
 import random
    
 __author__ = 'alan fineberg [email protected]'
        
 # tweak these!
 number_of_users = 100000
 number_of_concurrent_experiments = 50

 experiment_b_group_size = dict()
 experiment_b_group_utility = dict()

 def construct_random_experiments(num):
 	id = 1
 	for _ in range(num):
 		# multiply * 100 for the percent
 		experiment_b_group_size[id] = random.uniform(.001, .05)
 		# this utility could indicate RPU or some other high level metric. Uniform distro is not necessarily the best choice
 		experiment_b_group_utility[id] = random.randint(-100, 100)
 		id *= 2

 """ Generates a random cohort for all A/B tests based on the weights of each A/B test"""
 def random_cohort():
 	result = 0b0
 	for experiment, liklihood in experiment_b_group_size.iteritems():
 		if random.random() < liklihood:
 			result |= experiment
 	return result

 """ Reports the total utility gleaned from a user in a single cohort """
 def utility_from_experiment(cohort):
 	total_utility = 0
 	for experiment, utility in experiment_b_group_utility.iteritems():
 		if cohort & experiment:
 			total_utility += utility
 	return total_utility

 """ Reports on whether or not a single A/B test was a success. """
 def analyze_single_experiment(user_to_cohort, experiment):
 	A_total_utility = 0
 	B_total_utility = 0
 	
 	users_in_A_cohort = 0
 	users_in_B_cohort = 0

 	for _, cohort in user_to_cohort.iteritems():
 		if experiment & cohort:
 			B_total_utility += utility_from_experiment(cohort)
 			users_in_B_cohort += 1
 		else:
 			A_total_utility += utility_from_experiment(cohort)
 			users_in_A_cohort += 1
 	
 	A_avg = A_total_utility * 1.0 / users_in_A_cohort
 	if users_in_B_cohort:
 		B_avg =  B_total_utility * 1.0 / users_in_B_cohort
 	else:
 		B_avg = 0

 	expected_utility = experiment_b_group_utility[experiment]
 	error =	abs(max(B_avg - expected_utility, expected_utility - B_avg))
 	try:
 		error_percent = abs(int(error / expected_utility * 100))
 	except:
 		error_percent = 'undefined'

 	print '\n%s: %s users in B cohort.\n\t Utility: \n\t\tA cohort %s, \n\t\tB cohort %s' % (experiment, users_in_B_cohort, A_avg, B_avg)
 	print '\t\texpected: %s \n\t\tobserved: %s \n\t\terror: %s%%' % (expected_utility, int(B_avg), error_percent)
 	if B_avg > A_avg:
 		print '\tconclusion: apply experiment %s' % experiment
 	else:
 		print '\tconclusion: don\'t apply experiment %s' % experiment
 	if (B_avg >= 0 and expected_utility < 0) or (B_avg < 0 and expected_utility >= 0):
 		print '>>> ALERT! ALERT! BAD ADVICE GIVEN. BAD! <<<'
 	
 if __name__ == '__main__':
 	print 'running experiment for %s users' % number_of_users
 	construct_random_experiments(number_of_concurrent_experiments)
 	user_to_cohort = {}
 	for i in xrange(number_of_users):
 		user_to_cohort[i] = random_cohort()
 	
 	for experiment, _ in experiment_b_group_utility.iteritems():
 		analyze_single_experiment(user_to_cohort, experiment)
	"""
	Run a simulation of multiple concurrent A/B tests acting on a number of users.
	Reports on the expected vs. the actual results, and raises a warning if the actual results would lead to the wrong conclusion.
	Values of interest:
	-- number_of_users : active users who may be part of an A/B test
	-- number_of_concurrent_experiments: how many random concurrent experiments to generate
	-- values in construct_random_experiments(): define a range and distribution for utility and B cohort percentage for each experiment
	"""
	import random

	__author__ = 'alan fineberg [email protected]'

	# tweak these!
	number_of_users = 100000
	number_of_concurrent_experiments = 50

	experiment_b_group_size = dict()
	experiment_b_group_utility = dict()

	def construct_random_experiments(num):
	id = 1
	for _ in range(num):
	# multiply * 100 for the percent
	experiment_b_group_size[id] = random.uniform(.001, .05)
	# this utility could indicate RPU or some other high level metric. Uniform distro is not necessarily the best choice
	experiment_b_group_utility[id] = random.randint(-100, 100)
	id *= 2

	""" Generates a random cohort for all A/B tests based on the weights of each A/B test"""
	def random_cohort():
	result = 0b0
	for experiment, liklihood in experiment_b_group_size.iteritems():
	if random.random() < liklihood:
	result \|= experiment
	return result

	""" Reports the total utility gleaned from a user in a single cohort """
	def utility_from_experiment(cohort):
	total_utility = 0
	for experiment, utility in experiment_b_group_utility.iteritems():
	if cohort & experiment:
	total_utility += utility
	return total_utility

	""" Reports on whether or not a single A/B test was a success. """
	def analyze_single_experiment(user_to_cohort, experiment):
	A_total_utility = 0
	B_total_utility = 0

	users_in_A_cohort = 0
	users_in_B_cohort = 0

	for _, cohort in user_to_cohort.iteritems():
	if experiment & cohort:
	B_total_utility += utility_from_experiment(cohort)
	users_in_B_cohort += 1
	else:
	A_total_utility += utility_from_experiment(cohort)
	users_in_A_cohort += 1

	A_avg = A_total_utility * 1.0 / users_in_A_cohort
	if users_in_B_cohort:
	B_avg = B_total_utility * 1.0 / users_in_B_cohort
	else:
	B_avg = 0

	expected_utility = experiment_b_group_utility[experiment]
	error = abs(max(B_avg - expected_utility, expected_utility - B_avg))
	try:
	error_percent = abs(int(error / expected_utility * 100))
	except:
	error_percent = 'undefined'

	print '\n%s: %s users in B cohort.\n\t Utility: \n\t\tA cohort %s, \n\t\tB cohort %s' % (experiment, users_in_B_cohort, A_avg, B_avg)
	print '\t\texpected: %s \n\t\tobserved: %s \n\t\terror: %s%%' % (expected_utility, int(B_avg), error_percent)
	if B_avg > A_avg:
	print '\tconclusion: apply experiment %s' % experiment
	else:
	print '\tconclusion: don\'t apply experiment %s' % experiment
	if (B_avg >= 0 and expected_utility < 0) or (B_avg < 0 and expected_utility >= 0):
	print '>>> ALERT! ALERT! BAD ADVICE GIVEN. BAD! <<<'

	if __name__ == '__main__':
	print 'running experiment for %s users' % number_of_users
	construct_random_experiments(number_of_concurrent_experiments)
	user_to_cohort = {}
	for i in xrange(number_of_users):
	user_to_cohort[i] = random_cohort()

	for experiment, _ in experiment_b_group_utility.iteritems():
	analyze_single_experiment(user_to_cohort, experiment)