Skip to content

Instantly share code, notes, and snippets.

@devoxel
Created May 16, 2025 17:57
Show Gist options
  • Save devoxel/57f43a002d27305c2a655fba1454769d to your computer and use it in GitHub Desktop.
Save devoxel/57f43a002d27305c2a655fba1454769d to your computer and use it in GitHub Desktop.
Monte Carlo to estimate bias in Sloth SLO calculation optimization

5% bias calculated with the spiky load

$ python3 estimate_error.py
Starting Monte Carlo simulation with 1000 runs...
Each run simulates 30 days, divided into 8640 intervals of 5 minutes.
A events = 86136747, B events = 34253712
true = 0.92927688141684, agg = 0.9804034561428873, total_success = 1094375, total_fail = 1177663
Completed 100/1000 runs.
A events = 172610585, B events = 68464739
true = 0.9272375662645488, agg = 0.9789669518230053, total_success = 1135879, total_fail = 1225014
Completed 200/1000 runs.
A events = 259091185, B events = 102676862
true = 0.9262521386212486, agg = 0.9783377615656546, total_success = 1152065, total_fail = 1243792
Completed 300/1000 runs.
A events = 345454293, B events = 136896807
true = 0.9274609802070458, agg = 0.9790143768716426, total_success = 1143526, total_fail = 1232964
Completed 400/1000 runs.
A events = 431835696, B events = 171093533
true = 0.9289819016711329, agg = 0.9801429544467584, total_success = 1103794, total_fail = 1188176
Completed 500/1000 runs.
A events = 518099460, B events = 205328044
true = 0.9289558909582518, agg = 0.9800672821118768, total_success = 1104915, total_fail = 1189416
Completed 600/1000 runs.
A events = 604536545, B events = 239544213
true = 0.9274519293721025, agg = 0.9793509060624628, total_success = 1119121, total_fail = 1206662
Completed 700/1000 runs.
A events = 690771581, B events = 273762460
true = 0.9278703189434415, agg = 0.9792750390075414, total_success = 1132668, total_fail = 1220718
Completed 800/1000 runs.
A events = 777016374, B events = 307969575
true = 0.9293312279258934, agg = 0.9799860254546577, total_success = 1116453, total_fail = 1201351
Completed 900/1000 runs.
A events = 863516473, B events = 342163336
true = 0.9277058134164361, agg = 0.9792171867207045, total_success = 1128838, total_fail = 1216806
Completed 1000/1000 runs.
Mean Bias (Biased Metric - True Metric): 0.051378
Median Bias:                             0.051370
Standard Deviation of Bias:              0.000403
5th Percentile of Bias:                  0.050752
95th Percentile of Bias:                 0.052049
import numpy as np
import random
# --- Simulation Parameters ---
total_duration_days = 30
interval_minutes = 5
# Calculate the number of intervals in the total duration
num_intervals = int((total_duration_days * 24 * 60) / interval_minutes)
num_monte_carlo_runs = 1000 # Number of simulation runs
# Example of a very spiky load
# Probability of being in Regime A for any given interval
prob_regime_A = 0.2
volume_A_k, volume_A_theta = 100, 5 # Mean volume: k*theta = 500 requests per 5 min
success_A_alpha, success_A_beta = (90.0, 10.0) # ~ 10% failure rate
# Regime B: Low Volume, High Error
volume_B_k, volume_B_theta = 10, 5 # Mean volume: k*theta = 50 requests per 5 min
success_B_alpha, success_B_beta = (99.9, 0.1) # 1% failure rate
# --- Run the Monte Carlo Simulation ---
bias_results = []
print(f"Starting Monte Carlo simulation with {num_monte_carlo_runs} runs...")
print(f"Each run simulates {total_duration_days} days, divided into {num_intervals} intervals of {interval_minutes} minutes.")
count_a = 0
count_b = 0
for run in range(num_monte_carlo_runs):
num_requests_total = 0
num_success_total = 0
buckets = []
for i in range(num_intervals):
# Determine which regime this interval is in
current_regime = "A" if random.random() < prob_regime_A else "B"
# Ensure volume is non-negative
# Generate volume based on the current regime
if current_regime == "A":
num_requests = int(np.random.gamma(volume_A_k, volume_A_theta))
count_a += num_requests
success_rate = np.random.beta(success_A_alpha, success_A_beta)
else: # Regime B
num_requests = int(np.random.gamma(volume_B_k, volume_B_theta))
count_b += num_requests
success_rate = np.random.beta(success_B_alpha, success_B_beta)
num_requests = max(1, num_requests)
num_success = int(round(num_requests * success_rate))
num_success = min(num_success, num_requests)
# Calculate the 5m success ratio for this interval (what Prometheus records)
ratio_5m = num_success / num_requests
buckets.append(ratio_5m)
# Accumulate totals for the true overall ratio
num_requests_total += num_requests
num_success_total += num_success
biased_metric = np.sum(buckets) / len(buckets)
true_metric = num_success_total / num_requests_total
bias = biased_metric - true_metric
bias_results.append(bias)
if (run + 1) % 100 == 0:
print(f"A events = {count_a}, B events = {count_b}")
print( f"true = {true_metric}, agg = {biased_metric}, total_success = {num_success_total}, total_fail = {num_requests_total}")
print(f"Completed {run + 1}/{num_monte_carlo_runs} runs.")
mean_bias = np.mean(bias_results)
median_bias = np.median(bias_results)
std_dev_bias = np.std(bias_results)
percentile_5 = np.percentile(bias_results, 5)
percentile_95 = np.percentile(bias_results, 95)
print(f"Mean Bias (Biased Metric - True Metric): {mean_bias:.6f}")
print(f"Median Bias: {median_bias:.6f}")
print(f"Standard Deviation of Bias: {std_dev_bias:.6f}")
print(f"5th Percentile of Bias: {percentile_5:.6f}")
print(f"95th Percentile of Bias: {percentile_95:.6f}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment