Last active
May 25, 2016 19:34
-
-
Save henrik/ceb1331bd7aba42ed7deaf906d208bc7 to your computer and use it in GitHub Desktop.
A/B test simulations for my own learning.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simulating running an A/B test several times to see false negatives vs. statistical power. | |
# https://github.com/bmuller/abanalyzer | |
# gem install abanalyzer | |
require "abanalyzer" | |
# A converts at 50%. B converts at 66.66…%. | |
treatment_a = -> { [ "converted", "unconverted" ].sample } | |
treatment_b = -> { [ "converted", "converted", "unconverted" ].sample } | |
percent = -> (n, total) { (n/total.to_f * 100).round(2) } | |
# http://www.evanmiller.org/ab-testing/sample-size.html | |
# Baseline conversion rate: 50% | |
# Minimum detectable effect: 16.66% absolute | |
statistical_power = 80 # % | |
significance_level = 0.05 | |
sample_size = 139 # per variation | |
# Run 100 tests to see how many give the wrong result. | |
100.times do | |
a_conversions = 0 | |
b_conversions = 0 | |
sample_size.times do | |
a_conversions += 1 if treatment_a.call == "converted" | |
b_conversions += 1 if treatment_b.call == "converted" | |
end | |
# Fancy "G-test". | |
tester = ABAnalyzer::ABTest.new( | |
a: { converted: a_conversions, unconverted: sample_size - a_conversions }, | |
b: { converted: b_conversions, unconverted: sample_size - b_conversions }, | |
) | |
ab_different = tester.different?(significance_level) | |
unless ab_different | |
puts "False negative! Detected no diff where there is one. Should happen ~#{100 - statistical_power}/100 times." | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Simulating running an A/B test several times to see false positives vs. significance level. | |
# https://github.com/bmuller/abanalyzer | |
# gem install abanalyzer | |
require "abanalyzer" | |
# A and B both convert at 50%. | |
treatment_a = -> { [ "converted", "unconverted" ].sample } | |
treatment_b = -> { [ "converted", "unconverted" ].sample } | |
percent = -> (n, total) { (n/total.to_f * 100).round(2) } | |
# http://www.evanmiller.org/ab-testing/sample-size.html | |
# Baseline conversion rate: 50% | |
# Minimum detectable effect: 5% | |
significance_level = 0.05 | |
sample_size = 1567 | |
# Run 100 tests to see how many give the wrong result. | |
100.times do | |
a_conversions = 0 | |
b_conversions = 0 | |
sample_size.times do | |
a_conversions += 1 if treatment_a.call == "converted" | |
b_conversions += 1 if treatment_b.call == "converted" | |
end | |
# Fancy "G-test". | |
tester = ABAnalyzer::ABTest.new( | |
a: { converted: a_conversions, unconverted: sample_size - a_conversions }, | |
b: { converted: b_conversions, unconverted: sample_size - b_conversions }, | |
) | |
ab_different = tester.different?(significance_level) | |
if ab_different | |
puts "False positive! Detected a diff where there is none. Should happen ~#{significance_level * 100}/100 times." | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment