Skip to content

Instantly share code, notes, and snippets.

@lancejohnson
Last active April 24, 2022 18:42
Show Gist options
  • Save lancejohnson/8a62755e72579b378a736a6defbfc8a2 to your computer and use it in GitHub Desktop.
Save lancejohnson/8a62755e72579b378a736a6defbfc8a2 to your computer and use it in GitHub Desktop.
Model a "Strikes" system for deciding on sample sizes for binary tests with a threshold probability
# We're using a baseball analogy. E.g. A mailer gets 3 strikes per out and 3 outs in the first 9 pitches. All the limits are defined below
# inputs
ThresholdProb <- 0.03 # this is the minimum probability we need to be profitable for the business
num_simulations <- 1000
strikes_per_out = 3
outs_limit = 1
# The game starts here!
mailers_per_pitch = round(1 / ThresholdProb, 0)
mailers_per_simulation <- 1 / ThresholdProb * strikes_per_out * outs_limit
# Here's a list to hold all the simulations
AllSimulations <- list()
for (simulation in 1:num_simulations) {
# Here's a matrix to hold all of the calculations for each mailer. This is for
SingleSimulation <- data.frame(
# conversion is binary, 1=yes,0=no
conversion= integer(0),
sum_conversion = integer(0),
naive_conversion_rate = numeric(0), # conversion rate using naive estimates.
alpha.x = numeric(0),
beta.x = numeric(0),
pbeta = numeric(0), #probability our mailer's conversion rate is over the required threshold.
std = numeric(0),
pitches = integer(0), # pitches are the numbers of mailers we'd send to get one conversion given the threshold conversion rate.
strikes = integer(0), # strikes are pitches with 0 conversions.
outs = integer(0) # 3 strikes = 1 out
)
response <- rbinom(mailers_per_simulation,1,ThresholdProb)
Conversions <- 0
for (mailer in 1:mailers_per_simulation) {
SingleSimulation[mailer, "conversion"] <- response[mailer]
Conversions <- Conversions+SingleSimulation[mailer, "conversion"]
SingleSimulation[mailer, "sum_conversion"] <- Conversions
SingleSimulation[mailer, "naive_conversion_rate"] <- Conversions/mailer
alpha.x <- Conversions + ThresholdProb + 1
SingleSimulation[mailer, "alpha.x"] <- alpha.x
beta.x <- mailer - Conversions + (1 - ThresholdProb) + 1
SingleSimulation[mailer, "beta.x"] <- beta.x
pbeta <- pbeta(ThresholdProb, alpha.x, beta.x)
SingleSimulation[mailer, "pbeta"] <- pbeta
std <- ((alpha.x*beta.x)/((alpha.x+beta.x)^2*(alpha.x+beta.x+1)))^0.5
SingleSimulation[mailer, "std"] <- std
pitches <- floor(mailer / mailers_per_pitch)
SingleSimulation[mailer, "pitches"] <- pitches
strikes <- pitches - Conversions
SingleSimulation[mailer, "strikes"] <- strikes
outs <- floor(strikes/strikes_per_out)
SingleSimulation[mailer, "outs"] <- outs
}
AllSimulations[[simulation]] = SingleSimulation
}
# Now that we've got our simulations, we want to know somethings about them!
initial_pitch_limit = outs_limit * strikes_per_out
initial_mailer_limit = initial_pitch_limit * mailers_per_pitch
# What percentage strike out in the first 9 pitches? (In other words, have ZERO)
strike_outs <- c()
cat("Outs Limit ", outs_limit)
for (simulation in AllSimulations) {
strike_outs <- append(strike_outs, simulation[initial_mailer_limit,"outs"] >= outs_limit)
}
percent_strike_outs <- table(strike_outs)[TRUE] / num_simulations
# What is the average number of conversions in the initial mailer limit?
total_conversions <- 0
for (simulation in AllSimulations) {
total_conversions <- simulation[initial_mailer_limit,"sum_conversion"] + total_conversions
}
avg_conv_by_mailer_limit <- (total_conversions/num_simulations)/initial_mailer_limit
# What's the average std at mailer 1?
initial_std <- 0
for (simulation in AllSimulations) {
initial_std <- simulation[1,"std"] + initial_std
}
avg_initial_std <- initial_std/num_simulations
# What's the average std at initial mailer limit?
mailer_limit_std <- 0
for (simulation in AllSimulations) {
mailer_limit_std <- simulation[initial_mailer_limit,"std"] + mailer_limit_std
}
avg_std_by_mailer_limit <- (mailer_limit_std/num_simulations)/initial_mailer_limit
cat("Averages \n")
cat("Conv by mailer #", initial_mailer_limit, "is ", avg_conv_by_mailer_limit, "\n")
cat("% Strike Outs")
percent_strike_outs
cat("Average STD at mailer # 1 is ", avg_initial_std, "\n")
cat("STD by mailer #", initial_mailer_limit, "is ", avg_std_by_mailer_limit, "\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment