Last active
October 27, 2022 03:52
-
-
Save nickstenning/9bbe27b1c65411501572d7b77bc24380 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import math | |
import textwrap | |
from datetime import timedelta | |
DURATIONS = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800} | |
ERROR_RATES = [ | |
0.0001, | |
0.0005, | |
0.001, | |
0.005, | |
0.01, | |
0.05, | |
0.1, | |
0.3, | |
0.5, | |
0.9, | |
1, | |
] | |
# Honeycomb will alert based on the error rate measured over the last 1/4 of | |
# the configured exhaustion interval | |
LOOKBACK_FRACTION = 0.25 | |
def duration(s): | |
if len(s) == 1: | |
count = s | |
unit = "s" | |
else: | |
count = s[:-1] | |
unit = s[-1].lower() | |
if unit not in DURATIONS.keys(): | |
raise ValueError(f"'{unit}' is not a recognised unit of duration") | |
return int(count) * DURATIONS[unit] | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--slo", type=float, default=99.9) | |
parser.add_argument("--slo-interval", type=duration, default="30d") | |
parser.add_argument("--starting-budget", type=int, default=100) | |
parser.add_argument("--exhaustion-interval", type=duration, default="1d") | |
def time_to_alert(error_rate, slo, slo_interval, starting_budget, exhaustion_interval): | |
max_error_rate = starting_budget * (1 - slo) | |
max_error_rate_exhaustion = max_error_rate * (slo_interval/exhaustion_interval) | |
time_to_alert = (LOOKBACK_FRACTION * max_error_rate_exhaustion * exhaustion_interval) / error_rate | |
if time_to_alert > exhaustion_interval: | |
return math.inf | |
return int(time_to_alert) | |
def remaining_budget(error_rate, slo, slo_interval, starting_budget, calculation_interval): | |
slo_error_rate = (1 - slo) | |
burn = (error_rate / slo_error_rate) * (calculation_interval/slo_interval) | |
return starting_budget - burn | |
def duration_to_string(d): | |
return str(timedelta(seconds=d)) | |
def when(d): | |
if d == math.inf: | |
return "never" | |
return f"after {duration_to_string(d)}" | |
def main(): | |
args = parser.parse_args() | |
slo = args.slo/100 | |
slo_interval = args.slo_interval | |
starting_budget = args.starting_budget/100 | |
exhaustion_interval = args.exhaustion_interval | |
print(f"SLO: {slo:.3%} over {duration_to_string(slo_interval)}") | |
print(f"Budget remaining at start: {starting_budget:.2%}") | |
print(f"Burn alert exhaustion interval: {duration_to_string(exhaustion_interval)}") | |
print() | |
print(f" error rate alert fires* budget remaining**") | |
print(f" ---------- ------------ ------------------") | |
for e in ERROR_RATES: | |
t = time_to_alert(e, slo, slo_interval, starting_budget, exhaustion_interval) | |
b = remaining_budget(e, slo, slo_interval, starting_budget, min(t, exhaustion_interval)) | |
print(f"{e:12.2%} {when(t).ljust(27)} {b:8.2%}") | |
print(textwrap.dedent(""" | |
* | |
'never' is only true if the errors stop completely within the exhaustion | |
interval. If they persist then an alert will eventually fire so long as the | |
error rate is above the complement of the SLO (i.e. 0.05% for an SLO of | |
99.95%) and the alert exhaustion interval is less than or equal to the SLO | |
interval. | |
** | |
'budget remaining' indicates the remaining error budget when the alert | |
fires (or, if no alert fires, at the end of the exhaustion interval). You | |
can simulate successive periods by plugging this number back into the | |
--starting-budget argument.""")) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment