Last active
June 5, 2019 19:51
-
-
Save timmc/cb12ee4e50bfd7dd75b7697a78550551 to your computer and use it in GitHub Desktop.
Simulating ELB-style hysteresis for a host that exhibits random failures for both the healthcheck and regular requests
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns adhoc.hysteresis) | |
(def iterations 10000) | |
(defn simulate | |
[trip recover failure-rate] | |
(let [min-window (max trip recover) | |
;; Server up/down state over time | |
updown (->> (concat | |
;; Fake a recent history of being healthy | |
(repeat (dec min-window) true) | |
;; Lazy seq of future healthcheck results | |
(repeatedly iterations #(< failure-rate (rand)))) | |
;; Chop into rolling windows of healthcheck results | |
(partition min-window 1) | |
;; Statefully map healthchecks into server in/out | |
(reductions | |
(fn new-health [in-service? window] | |
(if in-service? | |
(not (every? #(= % false) (take-last trip window))) | |
(every? #(= % true) (take recover window)))) | |
;; Initial state: In service | |
true) | |
;; Drop initial service state | |
(rest)) | |
;; Analysis | |
up-periods (count (filter identity updown)) | |
uptime (/ up-periods iterations)] | |
{:updown updown | |
:uptime uptime | |
:effective-failure-rate (* uptime failure-rate) | |
:state-changes (count (partition-by identity updown)) | |
;; Record settings | |
:iterations iterations | |
:trip trip | |
:recover recover | |
:failure-rate failure-rate})) | |
(defn report | |
[results] | |
(let [{:keys [uptime state-changes iterations effective-failure-rate]} results] | |
(printf "Server in service %.03f of the time with %s state changes " | |
(float uptime) | |
state-changes) | |
(printf "(out of %s healthcheck intervals)\n" iterations) | |
(printf "Effective undiluted error rate: %.03f\n" | |
(float effective-failure-rate)))) | |
(comment | |
(doseq [failure-rate (range 0 1.001 0.01)] | |
(apply printf "%.02f\t%.05f\t%.05f\n" | |
(map float ((juxt :failure-rate :uptime :effective-failure-rate) | |
(simulate 2 10 failure-rate))))) | |
;; gnuplot -e "set terminal svg size 500,400 background rgb 'white'; set xlabel 'Server failure rate'; set xrange [0:1]; set yrange [0:1]; plot 'output.tsv' using 1:2 title 'server uptime', 'output.tsv' using 1:3 title 'effective failure rate'" > output.svg | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment