Skip to content

Instantly share code, notes, and snippets.

@nickstenning
Last active October 27, 2022 03:52
Show Gist options
  • Save nickstenning/9bbe27b1c65411501572d7b77bc24380 to your computer and use it in GitHub Desktop.
Save nickstenning/9bbe27b1c65411501572d7b77bc24380 to your computer and use it in GitHub Desktop.
import argparse
import math
import textwrap
from datetime import timedelta
DURATIONS = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800}
ERROR_RATES = [
0.0001,
0.0005,
0.001,
0.005,
0.01,
0.05,
0.1,
0.3,
0.5,
0.9,
1,
]
# Honeycomb will alert based on the error rate measured over the last 1/4 of
# the configured exhaustion interval
LOOKBACK_FRACTION = 0.25
def duration(s):
if len(s) == 1:
count = s
unit = "s"
else:
count = s[:-1]
unit = s[-1].lower()
if unit not in DURATIONS.keys():
raise ValueError(f"'{unit}' is not a recognised unit of duration")
return int(count) * DURATIONS[unit]
parser = argparse.ArgumentParser()
parser.add_argument("--slo", type=float, default=99.9)
parser.add_argument("--slo-interval", type=duration, default="30d")
parser.add_argument("--starting-budget", type=int, default=100)
parser.add_argument("--exhaustion-interval", type=duration, default="1d")
def time_to_alert(error_rate, slo, slo_interval, starting_budget, exhaustion_interval):
max_error_rate = starting_budget * (1 - slo)
max_error_rate_exhaustion = max_error_rate * (slo_interval/exhaustion_interval)
time_to_alert = (LOOKBACK_FRACTION * max_error_rate_exhaustion * exhaustion_interval) / error_rate
if time_to_alert > exhaustion_interval:
return math.inf
return int(time_to_alert)
def remaining_budget(error_rate, slo, slo_interval, starting_budget, calculation_interval):
slo_error_rate = (1 - slo)
burn = (error_rate / slo_error_rate) * (calculation_interval/slo_interval)
return starting_budget - burn
def duration_to_string(d):
return str(timedelta(seconds=d))
def when(d):
if d == math.inf:
return "never"
return f"after {duration_to_string(d)}"
def main():
args = parser.parse_args()
slo = args.slo/100
slo_interval = args.slo_interval
starting_budget = args.starting_budget/100
exhaustion_interval = args.exhaustion_interval
print(f"SLO: {slo:.3%} over {duration_to_string(slo_interval)}")
print(f"Budget remaining at start: {starting_budget:.2%}")
print(f"Burn alert exhaustion interval: {duration_to_string(exhaustion_interval)}")
print()
print(f" error rate alert fires* budget remaining**")
print(f" ---------- ------------ ------------------")
for e in ERROR_RATES:
t = time_to_alert(e, slo, slo_interval, starting_budget, exhaustion_interval)
b = remaining_budget(e, slo, slo_interval, starting_budget, min(t, exhaustion_interval))
print(f"{e:12.2%} {when(t).ljust(27)} {b:8.2%}")
print(textwrap.dedent("""
*
'never' is only true if the errors stop completely within the exhaustion
interval. If they persist then an alert will eventually fire so long as the
error rate is above the complement of the SLO (i.e. 0.05% for an SLO of
99.95%) and the alert exhaustion interval is less than or equal to the SLO
interval.
**
'budget remaining' indicates the remaining error budget when the alert
fires (or, if no alert fires, at the end of the exhaustion interval). You
can simulate successive periods by plugging this number back into the
--starting-budget argument."""))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment