Created
April 24, 2011 00:41
-
-
Save alanf/939174 to your computer and use it in GitHub Desktop.
use to test concurrent a/b tests
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Run a simulation of multiple concurrent A/B tests acting on a number of users. | |
Reports on the expected vs. the actual results, and raises a warning if the actual results would lead to the wrong conclusion. | |
Values of interest: | |
-- number_of_users : active users who may be part of an A/B test | |
-- number_of_concurrent_experiments: how many random concurrent experiments to generate | |
-- values in construct_random_experiments(): define a range and distribution for utility and B cohort percentage for each experiment | |
""" | |
import random | |
__author__ = 'alan fineberg [email protected]' | |
# tweak these! | |
number_of_users = 100000 | |
number_of_concurrent_experiments = 50 | |
experiment_b_group_size = dict() | |
experiment_b_group_utility = dict() | |
def construct_random_experiments(num): | |
id = 1 | |
for _ in range(num): | |
# multiply * 100 for the percent | |
experiment_b_group_size[id] = random.uniform(.001, .05) | |
# this utility could indicate RPU or some other high level metric. Uniform distro is not necessarily the best choice | |
experiment_b_group_utility[id] = random.randint(-100, 100) | |
id *= 2 | |
""" Generates a random cohort for all A/B tests based on the weights of each A/B test""" | |
def random_cohort(): | |
result = 0b0 | |
for experiment, liklihood in experiment_b_group_size.iteritems(): | |
if random.random() < liklihood: | |
result |= experiment | |
return result | |
""" Reports the total utility gleaned from a user in a single cohort """ | |
def utility_from_experiment(cohort): | |
total_utility = 0 | |
for experiment, utility in experiment_b_group_utility.iteritems(): | |
if cohort & experiment: | |
total_utility += utility | |
return total_utility | |
""" Reports on whether or not a single A/B test was a success. """ | |
def analyze_single_experiment(user_to_cohort, experiment): | |
A_total_utility = 0 | |
B_total_utility = 0 | |
users_in_A_cohort = 0 | |
users_in_B_cohort = 0 | |
for _, cohort in user_to_cohort.iteritems(): | |
if experiment & cohort: | |
B_total_utility += utility_from_experiment(cohort) | |
users_in_B_cohort += 1 | |
else: | |
A_total_utility += utility_from_experiment(cohort) | |
users_in_A_cohort += 1 | |
A_avg = A_total_utility * 1.0 / users_in_A_cohort | |
if users_in_B_cohort: | |
B_avg = B_total_utility * 1.0 / users_in_B_cohort | |
else: | |
B_avg = 0 | |
expected_utility = experiment_b_group_utility[experiment] | |
error = abs(max(B_avg - expected_utility, expected_utility - B_avg)) | |
try: | |
error_percent = abs(int(error / expected_utility * 100)) | |
except: | |
error_percent = 'undefined' | |
print '\n%s: %s users in B cohort.\n\t Utility: \n\t\tA cohort %s, \n\t\tB cohort %s' % (experiment, users_in_B_cohort, A_avg, B_avg) | |
print '\t\texpected: %s \n\t\tobserved: %s \n\t\terror: %s%%' % (expected_utility, int(B_avg), error_percent) | |
if B_avg > A_avg: | |
print '\tconclusion: apply experiment %s' % experiment | |
else: | |
print '\tconclusion: don\'t apply experiment %s' % experiment | |
if (B_avg >= 0 and expected_utility < 0) or (B_avg < 0 and expected_utility >= 0): | |
print '>>> ALERT! ALERT! BAD ADVICE GIVEN. BAD! <<<' | |
if __name__ == '__main__': | |
print 'running experiment for %s users' % number_of_users | |
construct_random_experiments(number_of_concurrent_experiments) | |
user_to_cohort = {} | |
for i in xrange(number_of_users): | |
user_to_cohort[i] = random_cohort() | |
for experiment, _ in experiment_b_group_utility.iteritems(): | |
analyze_single_experiment(user_to_cohort, experiment) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment