Last active
August 29, 2015 14:22
-
-
Save Zolmeister/dcdf1a85e491e8aaf8d5 to your computer and use it in GitHub Desktop.
A/B test calculations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################ | |
# A/B test calculations # | |
# # | |
# Based on Stats Engine # | |
# //pages.optimizely.com/rs/optimizely/images/stats_engine_technical_paper.pdf # | |
################################################################################ | |
################## | |
# Helper Methods # | |
################## | |
Z_MAX = 6 # Maxium +/- z value | |
# | |
# probability of normal z value | |
# Adapted from a polynomial approximation in: | |
# Ibbetson D, Algorithm 209 Collected Algorithms of the CACM 1963 p. 616 | |
# Note: This routine has six digit accuracy, so it is only useful for absolute | |
# z values <= 6. For z values > to 6.0, poz() returns 0.0 | |
# | |
# source: https://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html | |
# | |
poz = (z) -> | |
y = undefined | |
x = undefined | |
w = undefined | |
if z is 0.0 | |
x = 0.0 | |
else | |
y = 0.5 * Math.abs(z) | |
if y > Z_MAX * 0.5 | |
x = 1.0 | |
else if y < 1.0 | |
w = y * y | |
x = ((((((((0.000124818987 * w \ | |
- 0.001075204047) * w + 0.005198775019) * w \ | |
- 0.019198292004) * w + 0.059054035642) * w \ | |
- 0.151968751364) * w + 0.319152932694) * w \ | |
- 0.531923007300) * w + 0.797884560593) * y * 2.0 | |
else | |
y -= 2.0 | |
x = (((((((((((((-0.000045255659 * y \ | |
+ 0.000152529290) * y - 0.000019538132) * y \ | |
- 0.000676904986) * y + 0.001390604284) * y \ | |
- 0.000794620820) * y - 0.002034254874) * y \ | |
+ 0.006549791214) * y - 0.010557625006) * y \ | |
+ 0.011630447319) * y - 0.009279453341) * y \ | |
+ 0.005353579108) * y - 0.002141268741) * y \ | |
+ 0.000535310849) * y + 0.999936657524 | |
if z > 0.0 | |
return (x + 1.0) * 0.5 | |
else | |
return (1.0 - x) * 0.5 | |
# | |
# Compute critical normal z value to produce given p. | |
# We just do a bisection search for a value within CHI_EPSILON, | |
# relying on the monotonicity of pochisq(). | |
# | |
# source: https://www.fourmilab.ch/rpkp/experiments/analysis/zCalc.html | |
# | |
critz = (p) -> | |
Z_EPSILON = 0.000001 # Accuracy of z approximation | |
minz = -Z_MAX | |
maxz = Z_MAX | |
zval = 0.0 | |
pval = undefined | |
if p < 0.0 or p > 1.0 | |
return -1 | |
while maxz - minz > Z_EPSILON | |
pval = poz(zval) | |
if pval > p | |
maxz = zval | |
else | |
minz = zval | |
zval = (maxz + minz) * 0.5 | |
zval | |
proportionTrueNull = (results) -> | |
trueNull = _.sum results, ({isSignificant, xBar, yBar}) -> | |
if xBar > yBar and isSignificant | |
return 1 | |
else | |
return 0 | |
trueNull / results.length | |
############# | |
# Constants # | |
############# | |
tau = 0.5 # ???????? - free variable in significance | |
FDRthreshold = 0.10 | |
errorRate = 0.05 | |
data = | |
kpi1: | |
control: | |
rate: 0.2 | |
visitors: 10000 | |
var1: | |
rate: 0.22 | |
visitors: 10000 | |
kpi2: | |
control: | |
rate: 0.01 | |
visitors: 10000 | |
var1: | |
rate: 0.02 | |
visitors: 10000 | |
N = Object.keys(data.kpi1).length # number of a/b tests | |
####################### | |
# Conclusive Results # | |
# See (2) in paper # | |
####################### | |
conclusiveResults _.map data (data, kpi) -> | |
# You can imagine looking at more variables here too | |
xBar = kpi.control.rate | |
yBar = kpi.var1.rate | |
n = kpi.control.visitors + kpi.var1.visitors | |
thetaHat = yBar - xBar | |
alpha = errorRate | |
V = 2 * (xBar * (1 - xBar) + yBar * (1 - yBar)) / n | |
pHat = Math.sqrt( | |
(2 * Math.log(1 / alpha) - Math.log(V / (V + tau))) * | |
(V * (V + tau) / tau) | |
) | |
isSignificant = Math.abs(thetaHat) > pHat | |
{ | |
isSignificant | |
pHat | |
kpi | |
xBar | |
yBar | |
} | |
###################### | |
# Actionable Results # | |
# See (4) in paper # | |
###################### | |
FDRresults = _.map _.sortBy(conclusiveResults, 'pHat'), (result, index) -> | |
{pHat} = result | |
i = index + 1 | |
piZero = proportionTrueNull(conclusiveResults) | |
FDR = piZero * pHat / (i * N) | |
_.default {FDR}, result | |
# We can take action on these results! | |
significantResults = _.filter FDRresults, ({FDR}) -> | |
(1 - FDR) > FDRthreshold | |
######################## | |
# Confidence Intervals # | |
######################## | |
confidenceIntervals = _.map FDRresults, (result) -> | |
{xBar, yBar, isSignificant} = result | |
# Calculate std. deviation | |
mu = (xBar + yBar) / 2 | |
delta = Math.sqrt( | |
1 / 2 * | |
_.sum [xBar, yBar], (xi) -> | |
Math.pow(xi - mu, 2) | |
) | |
# See section 3.5 in paper | |
q = errorRate | |
m = significantResults.length | |
coverageLevel = if isSignificant | |
then (1 - q * m / N) | |
else (1 - q * (m + 1) / N) | |
z = critz(coverageLevel) | |
{ | |
low: yBar - z * delta / Math.sqrt(N) | |
high: yBar + z * delta / Math.sqrt(N) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
////////////////////////////////////////////////////////////////////////////////// | |
// A/B test calculations // | |
// // | |
// Based on Stats Engine // | |
// http://pages.optimizely.com/rs/optimizely/images/stats_engine_technical_paper.pdf // | |
////////////////////////////////////////////////////////////////////////////////// | |
var FDRresults, FDRthreshold, N, Z_MAX, confidenceIntervals, critz, data, errorRate, poz, proportionTrueNull, significantResults, tau; | |
Z_MAX = 6; | |
proportionTrueNull = function(results) { | |
var trueNull; | |
trueNull = _.sum(results, function(result) { | |
var isSignificant, xBar, yBar; | |
isSignificant = result.isSignificant, | |
xBar = result.xBar, | |
yBar = result.yBar; | |
if (xBar > yBar && isSignificant) { | |
return 1; | |
} else { | |
return 0; | |
} | |
}); | |
return trueNull / results.length; | |
}; | |
/////////////// | |
// Constants // | |
/////////////// | |
tau = 0.5; // ???????? - free variable in significance | |
FDRthreshold = 0.10; | |
errorRate = 0.05; | |
data = { | |
kpi1: { | |
control: { | |
rate: 0.2, | |
visitors: 10000 | |
}, | |
var1: { | |
rate: 0.22, | |
visitors: 10000 | |
} | |
}, | |
kpi2: { | |
control: { | |
rate: 0.01, | |
visitors: 10000 | |
}, | |
var1: { | |
rate: 0.02, | |
visitors: 10000 | |
} | |
} | |
}; | |
N = Object.keys(data.kpi1).length; // number of a/b tests | |
///////////////////////// | |
// Conclusive Results // | |
// See (2) in paper // | |
///////////////////////// | |
conclusiveResults = _.map(data(function(data, kpi) { | |
var V, alpha, isSignificant, n, pHat, thetaHat, xBar, yBar; | |
// You can imagine looking at more variables here too | |
xBar = kpi.control.rate; | |
yBar = kpi.var1.rate; | |
n = kpi.control.visitors + kpi.var1.visitors; | |
thetaHat = yBar - xBar; | |
alpha = errorRate; | |
V = 2 * (xBar * (1 - xBar) + yBar * (1 - yBar)) / n; | |
pHat = Math.sqrt( | |
(2 * Math.log(1 / alpha) - Math.log(V / (V + tau))) * | |
(V * (V + tau) / tau) | |
); | |
isSignificant = Math.abs(thetaHat) > pHat; | |
return { | |
isSignificant: isSignificant, | |
pHat: pHat, | |
kpi: kpi, | |
xBar: xBar, | |
yBar: yBar | |
}; | |
})); | |
//////////////////////// | |
// Actionable Results // | |
// See (4) in paper // | |
//////////////////////// | |
FDRresults = _.map(_.sortBy(conclusiveResults, 'pHat'), function(result, index) { | |
var FDR, i, pHat, piZero; | |
pHat = result.pHat; | |
i = index + 1; | |
piZero = proportionTrueNull(conclusiveResults); | |
FDR = piZero * pHat / (i * N); | |
return _.defaults({ | |
FDR: FDR | |
}, result); | |
}); | |
// We can take action on these results! | |
significantResults = _.filter(FDRresults, function(result) { | |
return (1 - result.FDR) > FDRthreshold; | |
}); | |
////////////////////////// | |
// Confidence Intervals // | |
////////////////////////// | |
confidenceIntervals = _.map(FDRresults, function(result) { | |
var coverageLevel, delta, isSignificant, m, mu, q, xBar, yBar, z; | |
xBar = result.xBar, | |
yBar = result.yBar, | |
isSignificant = result.isSignificant; | |
// Calculate std. deviation | |
mu = (xBar + yBar) / 2; | |
delta = Math.sqrt(1 / 2 * _.sum([xBar, yBar], function(xi) { | |
return Math.pow(xi - mu, 2); | |
})); | |
// See section 3.5 in paper | |
q = errorRate; | |
m = significantResults.length; | |
coverageLevel = isSignificant ? 1 - q * m / N : 1 - q * (m + 1) / N; | |
z = critz(coverageLevel); // convert coverageLevel to z-score | |
return { | |
low: yBar - z * delta / Math.sqrt(N), | |
high: yBar + z * delta / Math.sqrt(N) | |
}; | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment