Created
September 7, 2013 20:23
-
-
Save aphyr/6478950 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Probability of a read seeing a collision for various poisson rates | |
{1 1.317905310489196E-7, | |
10 5.749661865748004E-6, | |
100 5.303513799265161E-5, | |
1000 5.090228791739002E-4, | |
10000 0.005040357033134383, | |
100000 0.04925138356052143, | |
1000000 0.4179179486964119} | |
; Probability of observing 1 or more collisions per day, at 1 read per second: | |
{1 0.02, | |
10 0.34, | |
100 0.985, | |
1000 1.0, | |
10000 1.0, | |
100000 1.0, | |
1000000 1.0} | |
; Probability of any given cell being invalid, if it is written to twice by a poisson process with mean arrival rate of 1/s, 10/s, etc. | |
{1 4.1E-7, | |
10 4.93E-6, | |
100 4.994E-5 | |
1000 5.11E-4, | |
10000 0.005026, | |
100000 0.046884, | |
1000000 0.283527} | |
; At 10 writes per second, number of corrupt rows per day in the above system: | |
{1 0.35424, | |
10 4.25952, | |
100 43.14816, | |
1000 441.50399999999996, | |
10000 4342.464, | |
100000 40507.776000000005, | |
1000000 244967.32799999998} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(use 'incanter.distributions | |
'clojure.math.numeric-tower) | |
(defn timestamps | |
"Returns a lazy, infinite sequence of double timestamps with inter-timestamp | |
intervals governed by the given distribution." | |
[dist] | |
(->> dist | |
(partial draw) | |
repeatedly | |
(reductions + 0))) | |
(defn poisson-timestamps | |
"Returns a lazy, infinite sequence of double timestamps for a poisson process | |
with the given mean rate, in seconds." | |
[rate] | |
(timestamps (exponential-distribution rate))) | |
(defn gamma-timestamps | |
[shape rate] | |
(timestamps (gamma-distribution shape rate))) | |
(defn microsecond | |
"Converts a time in seconds to long microseconds." | |
[t] | |
(-> t (* 1000000) long)) | |
(defn collisions | |
"Counts collisions in a sorted timestamp series." | |
[ts] | |
(->> ts | |
(reduce (fn [[cs prev] cur] | |
(if (= prev cur) | |
[(inc cs) ::fencepost] | |
[cs cur])) | |
[0 ::fencepost]) | |
first)) | |
(defn collision-p | |
"Given a sequence of timestamps, estimates the probability of a collision on | |
any given write." | |
[ts] | |
(let [[colls cnt] (reduce (fn [[colls cnt prev] cur] | |
(if (= prev cur) | |
[(inc colls) (inc cnt) cur] | |
[colls (inc cnt) cur])) | |
[0 0 ::fencepost] | |
ts)] | |
(/ colls cnt))) | |
(defn collision-fraction | |
"The fraction of time during which a collision holds, assuming synchronized | |
clocks." | |
[ts] | |
(let [[_ _ bad-time good-time] | |
(reduce (fn [[prev state bad-time good-time] cur] | |
[cur | |
(if (= prev cur) | |
:bad | |
:good) | |
(if (= :bad state) | |
(+ bad-time (- cur prev)) | |
bad-time) | |
(if (= :good state) | |
(+ good-time (- cur prev)) | |
good-time) | |
cur]) | |
[nil nil 0 0] | |
ts)] | |
(/ bad-time (+ bad-time good-time)))) | |
(defn observance-p | |
"Probability of not observing an event with probability p, given n | |
observations:" | |
[n p] | |
(- 1 (expt (- 1 p) n))) | |
(defn explore | |
"Given a timestamp generator function, a reduction function to apply to a | |
timestamp series and a sequence of parameters, returns a map of parameters to | |
estimates of the reduced value over that generated series." | |
[generator reduction params] | |
(->> params | |
(pmap (fn [params] | |
[params (->> (if (sequential? params) | |
(apply generator params) | |
(generator params)) | |
(take 1000000) | |
(map microsecond) | |
reduction | |
double)])) | |
(into {}))) | |
(defn map-vals | |
[f m] | |
(->> m | |
(map (fn [[k v]] [k (f v)])) | |
(into {}))) | |
; Probability of a read seeing a collision for various poisson rates | |
(->> (range 0 7) | |
(map (partial expt 10)) | |
(explore poisson-timestamps collision-fraction) | |
clojure.pprint/pprint) | |
; Probability of observing no collisions in a day, at 1 read per second | |
(->> (range 0 7) | |
(map (partial expt 10)) | |
(explore poisson-timestamps collision-fraction) | |
(map-vals (partial observance-p (* 3600 24))) | |
clojure.pprint/pprint) | |
; Probability of a collision on any given write. | |
(->> (range 0 3) | |
(map (partial expt 10)) | |
(explore poisson-timestamps collision-p) | |
clojure.pprint/pprint) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment