Created
September 1, 2015 05:15
-
-
Save simonrad/3b227fbde34b66b0d104 to your computer and use it in GitHub Desktop.
Statistical Significance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from math import sqrt | |
| from scipy.stats import norm | |
| def normal_dist_params(num_trials, num_successes): | |
| # Returns the mean and variance of the normal distribution approximation that models the actual success rate given the experiment results. | |
| p = float(num_successes) / float(num_trials) | |
| n = num_trials | |
| mean = p | |
| variance = p * (1-p) / n | |
| return (mean, variance) | |
| def subtract_normal_dists(dist1_params, dist2_params): | |
| (mean1, variance1) = dist1_params | |
| (mean2, variance2) = dist2_params | |
| return (mean1 - mean2, variance1 + variance2) | |
| def ab_test_significance(control_trials, control_successes, experiment_trials, experiment_successes): | |
| # Returns the confidence level and z-score that the experiment was better than the control. | |
| control_dist = normal_dist_params(control_trials, control_successes) | |
| experiment_dist = normal_dist_params(experiment_trials, experiment_successes) | |
| diff_dist = subtract_normal_dists(experiment_dist, control_dist) | |
| (diff_dist_mean, diff_dist_variance) = diff_dist | |
| diff_dist_std_dev = sqrt(diff_dist_variance) | |
| return 1 - norm.cdf(0, diff_dist_mean, diff_dist_std_dev), diff_dist_mean/diff_dist_std_dev | |
| baseline_trials = 595 | |
| baseline_successes = 32 | |
| variation1_trials = 599 | |
| variation1_successes = 30 | |
| variation2_trials = 622 | |
| variation2_successes = 18 | |
| variation3_trials = 606 | |
| variation3_successes = 51 | |
| variation4_trials = 578 | |
| variation4_successes = 38 | |
| print "Mean of each experiment group:" | |
| print "Baseline: ", normal_dist_params(baseline_trials, baseline_successes)[0] | |
| print "Variation 1:", normal_dist_params(variation1_trials, variation1_successes)[0] | |
| print "Variation 2:", normal_dist_params(variation2_trials, variation2_successes)[0] | |
| print "Variation 3:", normal_dist_params(variation3_trials, variation3_successes)[0] | |
| print "Variation 4:", normal_dist_params(variation4_trials, variation4_successes)[0] | |
| print "Std-dev of each experiment group:" | |
| print "Baseline: ", sqrt(normal_dist_params(baseline_trials, baseline_successes)[1]) | |
| print "Variation 1:", sqrt(normal_dist_params(variation1_trials, variation1_successes)[1]) | |
| print "Variation 2:", sqrt(normal_dist_params(variation2_trials, variation2_successes)[1]) | |
| print "Variation 3:", sqrt(normal_dist_params(variation3_trials, variation3_successes)[1]) | |
| print "Variation 4:", sqrt(normal_dist_params(variation4_trials, variation4_successes)[1]) | |
| print "Confidence levels and z-scores that the variation was better than the baseline:" | |
| print "Variation 1:", ab_test_significance(baseline_trials, baseline_successes, variation1_trials, variation1_successes) | |
| print "Variation 2:", ab_test_significance(baseline_trials, baseline_successes, variation2_trials, variation2_successes) | |
| print "Variation 3:", ab_test_significance(baseline_trials, baseline_successes, variation3_trials, variation3_successes) | |
| print "Variation 4:", ab_test_significance(baseline_trials, baseline_successes, variation4_trials, variation4_successes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment