wrobstory · August 29, 2015 14:12
diff --git a/incanter.clj b/incanter.clj
 (require '[clojure.test.check.generators :as gen]
         '[criterium.core :refer [quick-bench]]
         '[incanter.core :refer [$where $fn]])
 ;; Use test.check to generate some random-ish input. 
 (def tenk-longs (gen/sample (gen/choose 0 10) 10000))
 (def tenk-strings (gen/sample (gen/elements ["foo" "bar" "baz" "qux"]) 10000))

 ;; Dataset, two columns
 (def test-dataset (incanter.core/dataset {:tenk-long tenk-longs :tenk-str tenk-strings}))

 ;; Straightforward where/filter on a single col, single value
 (quick-bench ($where {:tenk-long 2} test-dataset))

 Evaluation count : 6 in 6 samples of 1 calls.
             Execution time mean : 9.013762 sec
    Execution time std-deviation : 71.315784 ms
   Execution time lower quantile : 8.910411 sec ( 2.5%)
   Execution time upper quantile : 9.089943 sec (97.5%)
                   Overhead used : 1.796329 ns
 nil

 ;; Same thing, using native Clojure data structures. To be fair, we will realize all values
 (quick-bench(vec (filter #{2} tenk-longs)))

 WARNING: Final GC required 11.879337012724719 % of runtime
 Evaluation count : 456 in 6 samples of 76 calls.
             Execution time mean : 1.351492 ms
    Execution time std-deviation : 23.400274 µs
   Execution time lower quantile : 1.321761 ms ( 2.5%)
   Execution time upper quantile : 1.378844 ms (97.5%)
                   Overhead used : 1.796329 ns

 Found 1 outliers in 6 samples (16.6667 %)
    low-severe   1 (16.6667 %)
 Variance from outliers : 13.8889 % Variance is moderately inflated by outliers
 nil

 ;; Slightly more complicated predicate, filtering both columns
 (def filter-expr ($fn [tenk-long tenk-str] 
                   (and (= (first tenk-str) \b) (= tenk-long 2))))
 (quick-bench ($where filter-expr test-dataset))

 Evaluation count : 6 in 6 samples of 1 calls.
             Execution time mean : 9.361352 sec
    Execution time std-deviation : 44.957808 ms
   Execution time lower quantile : 9.293211 sec ( 2.5%)
   Execution time upper quantile : 9.406518 sec (97.5%)
                   Overhead used : 1.796329 ns

 Found 1 outliers in 6 samples (16.6667 %)
    low-severe   1 (16.6667 %)
 Variance from outliers : 13.8889 % Variance is moderately inflated by outliers
 nil

 (defn pred
  [accum row]
    (let [[str-val long-val] row
          [str-col long-col] accum]
      (if (and (= (first str-val) \b) (= long-val 2))
        (vector (conj str-col str-val) (conj long-col long-val))
        accum)))

 (quick-bench (reduce pred [[][]] (map vector tenk-strings tenk-longs)))

 WARNING: Final GC required 11.208964139749341 % of runtime
 Evaluation count : 222 in 6 samples of 37 calls.
             Execution time mean : 2.800566 ms
    Execution time std-deviation : 92.798570 µs
   Execution time lower quantile : 2.707674 ms ( 2.5%)
   Execution time upper quantile : 2.911025 ms (97.5%)
                   Overhead used : 1.796329 ns
 nil
	(require '[clojure.test.check.generators :as gen]
	'[criterium.core :refer [quick-bench]]
	'[incanter.core :refer [$where $fn]])
	;; Use test.check to generate some random-ish input.
	(def tenk-longs (gen/sample (gen/choose 0 10) 10000))
	(def tenk-strings (gen/sample (gen/elements ["foo" "bar" "baz" "qux"]) 10000))

	;; Dataset, two columns
	(def test-dataset (incanter.core/dataset {:tenk-long tenk-longs :tenk-str tenk-strings}))

	;; Straightforward where/filter on a single col, single value
	(quick-bench ($where {:tenk-long 2} test-dataset))

	Evaluation count : 6 in 6 samples of 1 calls.
	Execution time mean : 9.013762 sec
	Execution time std-deviation : 71.315784 ms
	Execution time lower quantile : 8.910411 sec ( 2.5%)
	Execution time upper quantile : 9.089943 sec (97.5%)
	Overhead used : 1.796329 ns
	nil

	;; Same thing, using native Clojure data structures. To be fair, we will realize all values
	(quick-bench(vec (filter #{2} tenk-longs)))

	WARNING: Final GC required 11.879337012724719 % of runtime
	Evaluation count : 456 in 6 samples of 76 calls.
	Execution time mean : 1.351492 ms
	Execution time std-deviation : 23.400274 µs
	Execution time lower quantile : 1.321761 ms ( 2.5%)
	Execution time upper quantile : 1.378844 ms (97.5%)
	Overhead used : 1.796329 ns

	Found 1 outliers in 6 samples (16.6667 %)
	low-severe 1 (16.6667 %)
	Variance from outliers : 13.8889 % Variance is moderately inflated by outliers
	nil

	;; Slightly more complicated predicate, filtering both columns
	(def filter-expr ($fn [tenk-long tenk-str]
	(and (= (first tenk-str) \b) (= tenk-long 2))))
	(quick-bench ($where filter-expr test-dataset))

	Evaluation count : 6 in 6 samples of 1 calls.
	Execution time mean : 9.361352 sec
	Execution time std-deviation : 44.957808 ms
	Execution time lower quantile : 9.293211 sec ( 2.5%)
	Execution time upper quantile : 9.406518 sec (97.5%)
	Overhead used : 1.796329 ns

	Found 1 outliers in 6 samples (16.6667 %)
	low-severe 1 (16.6667 %)
	Variance from outliers : 13.8889 % Variance is moderately inflated by outliers
	nil

	(defn pred
	[accum row]
	(let [[str-val long-val] row
	[str-col long-col] accum]
	(if (and (= (first str-val) \b) (= long-val 2))
	(vector (conj str-col str-val) (conj long-col long-val))
	accum)))

	(quick-bench (reduce pred [[][]] (map vector tenk-strings tenk-longs)))

	WARNING: Final GC required 11.208964139749341 % of runtime
	Evaluation count : 222 in 6 samples of 37 calls.
	Execution time mean : 2.800566 ms
	Execution time std-deviation : 92.798570 µs
	Execution time lower quantile : 2.707674 ms ( 2.5%)
	Execution time upper quantile : 2.911025 ms (97.5%)
	Overhead used : 1.796329 ns
	nil