Created
March 22, 2014 21:45
-
-
Save kyptin/9714908 to your computer and use it in GitHub Desktop.
Evaluation of different methods for computing variance
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns perftest.core | |
(:require | |
[clojure.java.io :as io])) | |
(defn perftest | |
[] | |
(with-open [writer (io/writer "times.out")] | |
(let [bigseq (range 1e8)] | |
(doall | |
(for [item bigseq] | |
(let [t0 (System/nanoTime) | |
ret (* 2 item) | |
t1 (System/nanoTime) | |
t (- t1 t0)] | |
(.write writer (format "%d\n" t)) | |
ret)))))) | |
(defn times | |
[] | |
(->> "times.out" | |
io/reader | |
line-seq | |
(map #(Long/parseLong %)))) | |
(defn mean | |
[xs] | |
(loop [xs xs | |
n 0 | |
sum 0.0] | |
(if-not (seq xs) | |
(double (/ sum n)) | |
(recur (next xs) | |
(inc n) | |
(+ sum (first xs)))))) | |
(defn variance | |
[xs] | |
(let [n (count xs) | |
mean (mean xs) | |
resids (map #(- % mean) xs) | |
sqr-resids (map #(* % %) resids) | |
sum-sqr-resids (reduce + sqr-resids)] | |
(double (/ sum-sqr-resids (dec n))))) | |
(defn exact | |
[] | |
;; Can't just call `variance` because it retains the head. | |
(let [n (count (times)) | |
mean (mean (times)) | |
resids (map #(- % mean) (times)) | |
sqr-resids (map #(* % %) resids) | |
sum-sqr-resids (reduce + sqr-resids)] | |
(double (/ sum-sqr-resids (dec n))))) | |
(defn welford | |
[] | |
(loop [times (times) | |
n 0 | |
mean 0.0 | |
m2 0.0] | |
(if-not (seq times) | |
(/ m2 (dec n)) | |
(let [x (first times) | |
n' (inc n) | |
delta (- x mean) | |
mean' (+ mean (/ delta n')) | |
m2' (+ m2 (* delta (- x mean')))] | |
(recur (next times) n' mean' m2'))))) | |
(defn chunked | |
[chunk-size] | |
(->> (times) | |
(partition-all chunk-size) | |
(map variance) | |
mean)) | |
(defmacro print-result | |
[method-sym & [arg-val]] | |
`(println (format "%7s | %8s | %10.9e" | |
~(str method-sym) | |
~(if arg-val (str arg-val) "") | |
(if ~arg-val (~method-sym ~arg-val) (~method-sym))))) | |
(defn -main | |
[] | |
(perftest) ;; this step takes a while | |
(println (format "%7s | %8s | %8s" | |
"method" | |
"param" | |
"variance")) | |
(print-result exact) | |
(print-result welford) | |
(print-result chunked 1000) | |
(print-result chunked 10000) | |
(print-result chunked 100000) | |
(print-result chunked 1000000)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Some results: