Last active
April 24, 2022 18:42
-
-
Save lancejohnson/8a62755e72579b378a736a6defbfc8a2 to your computer and use it in GitHub Desktop.
Model a "Strikes" system for deciding on sample sizes for binary tests with a threshold probability
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We're using a baseball analogy. E.g. A mailer gets 3 strikes per out and 3 outs in the first 9 pitches. All the limits are defined below | |
# inputs | |
ThresholdProb <- 0.03 # this is the minimum probability we need to be profitable for the business | |
num_simulations <- 1000 | |
strikes_per_out = 3 | |
outs_limit = 1 | |
# The game starts here! | |
mailers_per_pitch = round(1 / ThresholdProb, 0) | |
mailers_per_simulation <- 1 / ThresholdProb * strikes_per_out * outs_limit | |
# Here's a list to hold all the simulations | |
AllSimulations <- list() | |
for (simulation in 1:num_simulations) { | |
# Here's a matrix to hold all of the calculations for each mailer. This is for | |
SingleSimulation <- data.frame( | |
# conversion is binary, 1=yes,0=no | |
conversion= integer(0), | |
sum_conversion = integer(0), | |
naive_conversion_rate = numeric(0), # conversion rate using naive estimates. | |
alpha.x = numeric(0), | |
beta.x = numeric(0), | |
pbeta = numeric(0), #probability our mailer's conversion rate is over the required threshold. | |
std = numeric(0), | |
pitches = integer(0), # pitches are the numbers of mailers we'd send to get one conversion given the threshold conversion rate. | |
strikes = integer(0), # strikes are pitches with 0 conversions. | |
outs = integer(0) # 3 strikes = 1 out | |
) | |
response <- rbinom(mailers_per_simulation,1,ThresholdProb) | |
Conversions <- 0 | |
for (mailer in 1:mailers_per_simulation) { | |
SingleSimulation[mailer, "conversion"] <- response[mailer] | |
Conversions <- Conversions+SingleSimulation[mailer, "conversion"] | |
SingleSimulation[mailer, "sum_conversion"] <- Conversions | |
SingleSimulation[mailer, "naive_conversion_rate"] <- Conversions/mailer | |
alpha.x <- Conversions + ThresholdProb + 1 | |
SingleSimulation[mailer, "alpha.x"] <- alpha.x | |
beta.x <- mailer - Conversions + (1 - ThresholdProb) + 1 | |
SingleSimulation[mailer, "beta.x"] <- beta.x | |
pbeta <- pbeta(ThresholdProb, alpha.x, beta.x) | |
SingleSimulation[mailer, "pbeta"] <- pbeta | |
std <- ((alpha.x*beta.x)/((alpha.x+beta.x)^2*(alpha.x+beta.x+1)))^0.5 | |
SingleSimulation[mailer, "std"] <- std | |
pitches <- floor(mailer / mailers_per_pitch) | |
SingleSimulation[mailer, "pitches"] <- pitches | |
strikes <- pitches - Conversions | |
SingleSimulation[mailer, "strikes"] <- strikes | |
outs <- floor(strikes/strikes_per_out) | |
SingleSimulation[mailer, "outs"] <- outs | |
} | |
AllSimulations[[simulation]] = SingleSimulation | |
} | |
# Now that we've got our simulations, we want to know somethings about them! | |
initial_pitch_limit = outs_limit * strikes_per_out | |
initial_mailer_limit = initial_pitch_limit * mailers_per_pitch | |
# What percentage strike out in the first 9 pitches? (In other words, have ZERO) | |
strike_outs <- c() | |
cat("Outs Limit ", outs_limit) | |
for (simulation in AllSimulations) { | |
strike_outs <- append(strike_outs, simulation[initial_mailer_limit,"outs"] >= outs_limit) | |
} | |
percent_strike_outs <- table(strike_outs)[TRUE] / num_simulations | |
# What is the average number of conversions in the initial mailer limit? | |
total_conversions <- 0 | |
for (simulation in AllSimulations) { | |
total_conversions <- simulation[initial_mailer_limit,"sum_conversion"] + total_conversions | |
} | |
avg_conv_by_mailer_limit <- (total_conversions/num_simulations)/initial_mailer_limit | |
# What's the average std at mailer 1? | |
initial_std <- 0 | |
for (simulation in AllSimulations) { | |
initial_std <- simulation[1,"std"] + initial_std | |
} | |
avg_initial_std <- initial_std/num_simulations | |
# What's the average std at initial mailer limit? | |
mailer_limit_std <- 0 | |
for (simulation in AllSimulations) { | |
mailer_limit_std <- simulation[initial_mailer_limit,"std"] + mailer_limit_std | |
} | |
avg_std_by_mailer_limit <- (mailer_limit_std/num_simulations)/initial_mailer_limit | |
cat("Averages \n") | |
cat("Conv by mailer #", initial_mailer_limit, "is ", avg_conv_by_mailer_limit, "\n") | |
cat("% Strike Outs") | |
percent_strike_outs | |
cat("Average STD at mailer # 1 is ", avg_initial_std, "\n") | |
cat("STD by mailer #", initial_mailer_limit, "is ", avg_std_by_mailer_limit, "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment