Created
September 28, 2019 18:34
-
-
Save katox/70c2ffcf014f8fc0776ef99b21e6e21b to your computer and use it in GitHub Desktop.
From elegance to speed (Clojure version)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; | |
;; http://johnj.com/from-elegance-to-speed.html | |
;; | |
;; The Clojure version | |
;; ------------------- | |
(ns smt | |
(:require [net.cgrand.xforms :as x] | |
[criterium.core :as c :refer [quick-bench]])) | |
(set! *unchecked-math* :warn-on-boxed) | |
(def ^:const TIMES-LENGTH (long 1e6)) | |
(defmacro benchmark* [expr-name expr len] | |
`(let [result# (c/quick-benchmark ~expr {}) | |
mean# (double (first (:mean result#)))] | |
(println "Benchmarking:" ~expr-name) | |
(c/report-result result#) | |
(println (format "Processing rate: %.03f MHz" (/ ~len mean# 1e6))))) | |
(defmacro benchmark [expr-name expr] | |
`(benchmark* ~expr-name ~expr ~TIMES-LENGTH)) | |
; The original blog post Clojure code | |
(defn smt-8 [times] | |
(->> times | |
(partition 8 1) | |
(map (juxt identity | |
(comp (partial apply -) | |
(juxt last first)))) | |
(filter (comp (partial > 1000) second)))) | |
(benchmark | |
"Original smt-8 code" | |
(count | |
(let [times (iterate #(+ % (rand-int 1000)) 0)] | |
(smt-8 (take TIMES-LENGTH times))))) | |
; Benchmarking: Original smt-8 code | |
; Evaluation count : 6 in 6 samples of 1 calls. | |
; Execution time mean : 2.273346 sec | |
; Execution time std-deviation : 15.132003 ms | |
; Execution time lower quantile : 2.257620 sec ( 2.5%) | |
; Execution time upper quantile : 2.293020 sec (97.5%) | |
; Overhead used : 8.159543 ns | |
; Processing rate: 0.440 MHz | |
; First steps | |
; ------------ | |
; "The first step is to translate our code as is to Common Lisp. We’re going | |
; to punt entirely on the question of laziness for this post" | |
; So let's look at the lazy randomly increasing sequence | |
(benchmark | |
"Time series lazy seq created by `iterate`" | |
(doall (take TIMES-LENGTH (iterate #(+ % (rand-int 1000)) 0)))) | |
; Evaluation count : 6 in 6 samples of 1 calls. | |
; Execution time mean : 126.536857 ms | |
; Execution time std-deviation : 1.939898 ms | |
; Execution time lower quantile : 124.567926 ms ( 2.5%) | |
; Execution time upper quantile : 128.832302 ms (97.5%) | |
; Overhead used : 7.977976 ns | |
; We can get rid of lazyness and put it all into a vector | |
(defn compute-times [n] | |
(let [n (int n) | |
v (transient [])] | |
(loop [i (int 0) | |
y 0 | |
res v] | |
(if (< i n) | |
(recur (unchecked-inc-int i) | |
(unchecked-add y (long (rand 1000))) | |
(conj! res y)) | |
(persistent! res))))) | |
(benchmark | |
"Time series vector created using `transient`" | |
(doall (compute-times TIMES-LENGTH))) | |
; Evaluation count : 18 in 6 samples of 3 calls. | |
; Execution time mean : 48.965833 ms | |
; Execution time std-deviation : 1.640308 ms | |
; Execution time lower quantile : 46.496836 ms ( 2.5%) | |
; Execution time upper quantile : 50.608152 ms (97.5%) | |
; Overhead used : 8.141903 ns | |
; Or even a simple java array of primitive longs could do | |
(defn acompute-times ^longs [^long n] | |
(let [array (long-array n)] | |
(loop [i (int 0) | |
y 0] | |
(if (< i n) | |
(let [y' (unchecked-add y (long (rand 1000)))] | |
(aset array i y) | |
(recur (unchecked-inc-int i) y')) | |
array)))) | |
(benchmark | |
"Time series java array created using primitives" | |
(doall (acompute-times TIMES-LENGTH))) | |
; Evaluation count : 24 in 6 samples of 4 calls. | |
; Execution time mean : 34.014258 ms | |
; Execution time std-deviation : 461.698023 µs | |
; Execution time lower quantile : 33.445153 ms ( 2.5%) | |
; Execution time upper quantile : 34.505905 ms (97.5%) | |
; Overhead used : 8.140303 ns | |
; Let the original smt-8 iterate over the precomputed array | |
(benchmark | |
"smt-8 iterating over the array" | |
(count | |
(smt-8 (acompute-times TIMES-LENGTH)))) | |
; Benchmarking: smt-8 iterating over the array | |
; Evaluation count : 6 in 6 samples of 1 calls. | |
; Execution time mean : 2.220764 sec | |
; Execution time std-deviation : 22.597689 ms | |
; Execution time lower quantile : 2.190454 sec ( 2.5%) | |
; Execution time upper quantile : 2.246106 sec (97.5%) | |
; Overhead used : 8.159543 ns | |
; Processing rate: 0.450 MHz | |
; With cosmetic changes to avoid the overuse of comp and juxt | |
(defn v2-smt-8 [^longs times] | |
(->> times | |
(partition 8 1) | |
(map (fn [x] (let [f (first x) | |
l (last x)] | |
[x (- l f)]))) | |
(filter (fn [[x diff]] (< diff 1000))))) | |
(benchmark | |
"v2-smt-8 without juxt iterating over the array" | |
(count | |
(v2-smt-8 (acompute-times TIMES-LENGTH)))) | |
; Benchmarking: v2-smt-8 without juxt iterating over the array | |
; Evaluation count : 6 in 6 samples of 1 calls. | |
; Execution time mean : 1.924487 sec | |
; Execution time std-deviation : 13.311011 ms | |
; Execution time lower quantile : 1.910717 sec ( 2.5%) | |
; Execution time upper quantile : 1.939049 sec (97.5%) | |
; Overhead used : 8.159543 ns | |
; Processing rate: 0.520 MHz | |
; But so far the code is still lazy | |
; we can switch to eager by using transducers | |
(defn v3-smt-8 [^longs times] | |
(sequence | |
(comp | |
(x/partition 8 1) | |
(map (fn [x] (let [f (first x) | |
l (last x)] | |
[x (- l f)]))) | |
(filter (fn [[x diff]] (< diff 1000)))) | |
times)) | |
(benchmark | |
"v3-smt-8 simplified iterating over the array" | |
(count | |
(v3-smt-8 (acompute-times TIMES-LENGTH)))) | |
; Benchmarking: v3-smt-8 simplified iterating over the array | |
; Evaluation count : 6 in 6 samples of 1 calls. | |
; Execution time mean : 1.056290 sec | |
; Execution time std-deviation : 11.500128 ms | |
; Execution time lower quantile : 1.040926 sec ( 2.5%) | |
; Execution time upper quantile : 1.067729 sec (97.5%) | |
; Overhead used : 8.159543 ns | |
; Processing rate: 0.947 MHz | |
; It looks that more records are discarded than passed | |
; Let's try to swap `map` and `filter` | |
(defn v4-smt-8 [^longs times] | |
(sequence | |
(comp | |
(x/partition 8 1) | |
(filter (fn [x] (< (- (last x) (first x)) 1000))) | |
(map (fn [x] [x (- (last x) (first x))]))) | |
times)) | |
(benchmark | |
"v4-smt-8 reordered `filter` and `map` iterating over the array" | |
(count | |
(v4-smt-8 (acompute-times TIMES-LENGTH)))) | |
; Benchmarking: v4-smt-8 reordered `filter` and `map` iterating over the array | |
; Evaluation count : 6 in 6 samples of 1 calls. | |
; Execution time mean : 1.039309 sec | |
; Execution time std-deviation : 7.558152 ms | |
; Execution time lower quantile : 1.030284 sec ( 2.5%) | |
; Execution time upper quantile : 1.047647 sec (97.5%) | |
; Overhead used : 8.159543 ns | |
; Processing rate: 0.962 MHz | |
; Let's get rid of boxed int math first | |
(defmacro diff [x] | |
`(unchecked-subtract (long (last ~x)) (long (first ~x)))) | |
(defn v5-smt-8 [^longs times] | |
(sequence | |
(comp | |
(x/partition 8 1) | |
(filter (fn [x] (< (diff x) 1000))) | |
(map (fn [x] [x (diff x)]))) | |
times)) | |
(benchmark | |
"v5-smt-8 using primitive math iterating over the array" | |
(count | |
(v5-smt-8 (acompute-times TIMES-LENGTH)))) | |
; Benchmarking: v5-smt-8 using primitive math iterating over the array | |
; Evaluation count : 6 in 6 samples of 1 calls. | |
; Execution time mean : 1.025017 sec | |
; Execution time std-deviation : 14.802192 ms | |
; Execution time lower quantile : 1.006743 sec ( 2.5%) | |
; Execution time upper quantile : 1.041188 sec (97.5%) | |
; Overhead used : 8.159543 ns | |
; Processing rate: 0.976 MHz | |
; "To improve the algorithm proper, I perform the entire computation in a single | |
; loop statement. I also change the output slightly to show the index for matching | |
; 8-fold events, start and end time, and event duration" | |
; OK, let's do the same low level optimization in clojure | |
(defn second-try [^longs a] | |
(let [n (count a) | |
max (int (- n 7))] | |
(loop [i 0 | |
res (transient [])] | |
(if (< i max) | |
(let [f (aget a i) | |
l (aget a (+ i 7)) | |
diff (unchecked-subtract l f)] | |
(if (< diff 1000) | |
(recur (unchecked-inc-int i) (conj! res {:index i :startval f :endval l :diff diff})) | |
(recur (unchecked-inc-int i) res))) | |
(persistent! res))))) | |
(benchmark | |
"second-try single pass primitive loop function" | |
(count | |
(second-try (acompute-times TIMES-LENGTH)))) | |
; Benchmarking: second-try single pass primitive loop function | |
; Evaluation count : 30 in 6 samples of 5 calls. | |
; Execution time mean : 23.821902 ms | |
; Execution time std-deviation : 331.676247 µs | |
; Execution time lower quantile : 23.518765 ms ( 2.5%) | |
; Execution time upper quantile : 24.198100 ms (97.5%) | |
; Overhead used : 8.013372 ns | |
; Processing rate: 41.978 MHz | |
; let's verify we get the same result | |
(let [times (acompute-times TIMES-LENGTH) | |
original (smt-8 times) | |
functional (v5-smt-8 times) | |
optimized (second-try times) | |
describe-subseq (fn [[v diff]] {:startval (first v) :endval (last v) :diff diff}) | |
results-equal (= (map describe-subseq original) | |
(map describe-subseq functional) | |
(map #(dissoc % :index) optimized))] | |
(println "Computation results equal:" results-equal) | |
results-equal) | |
; => true | |
; In the blog post section "Improvements" measurements are done on the function itself | |
; on a fixed 100x longer precomputed sequence | |
(let [BIG-LENGTH ^:const (* 100 TIMES-LENGTH) | |
times (acompute-times BIG-LENGTH)] | |
(benchmark* | |
"second-try on a huge precomputed array" | |
(count | |
(second-try times)) | |
BIG-LENGTH)) | |
; Benchmarking: second-try on a huge precomputed array | |
; Evaluation count : 6 in 6 samples of 1 calls. | |
; Execution time mean : 167.110500 ms | |
; Execution time std-deviation : 3.097737 ms | |
; Execution time lower quantile : 163.810594 ms ( 2.5%) | |
; Execution time upper quantile : 171.506338 ms (97.5%) | |
; Overhead used : 8.140743 ns | |
; Processing rate: 598.406 MHz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment