Skip to content

Instantly share code, notes, and snippets.

@jeroenvandijk
Last active October 2, 2015 15:20
Show Gist options
  • Save jeroenvandijk/2c6521b37411bf2a2737 to your computer and use it in GitHub Desktop.
Save jeroenvandijk/2c6521b37411bf2a2737 to your computer and use it in GitHub Desktop.
Incanter transducer prototype
(ns adgoji.incanter.core
(:require [incanter.core :as incanter]))
(defn update-dataset [col-fn row-fn {:keys [column-names rows] :as dataset}]
(assoc (assoc dataset :rows nil)
:column-names (col-fn column-names)
:rows (row-fn rows)))
(defn wrap-with-transducer [xf]
(fn [coll] (sequence xf coll)))
(defn add-rows-transducer [xf dataset]
(update-dataset identity (wrap-with-transducer xf) dataset))
(defn nil-resistant [f]
(fn [a b]
(if a
(if b
(f a b)
a)
b)))
(defn $rollup-transducer [summary-fun col-name group-by]
;; First part is almost the same as in canters version
(let [key-fn (if (coll? col-name)
(fn [row]
(select-keys row col-name))
(fn [row]
(select-keys row [col-name])))
rollup-fns {:max (nil-resistant max)
:min (nil-resistant min)
:sum (nil-resistant +)
;; :count (fnil inc 0)
;; TODO mean doesn't work as it needs to no the count AND the sum
;:mean (fn [col-data] (/ (apply + col-data) (count col-data)))
}
rollup-fn (if (keyword? summary-fun)
(partial merge-with (rollup-fns summary-fun))
summary-fun)]
(comp
(fn [rf]
(let [reduced-rows (volatile! (transient {}))]
(fn
([] (rf))
([result]
(rf result (map (fn [[k v]]
(conj k v))
(persistent! @reduced-rows))))
([result row]
(let [k (select-keys row group-by)
a (@reduced-rows k)
b (key-fn row)]
;; Use transducer here ?
(vswap! reduced-rows assoc! k (if a (rollup-fn a b) b))
result)
))))
cat)))
(defn $rollup [summary-fun col-name group-by dataset]
(let [new-column-names (vec (flatten (concat [group-by] [col-name])))]
(update-dataset (constantly new-column-names)
(wrap-with-transducer ($rollup-transducer summary-fun col-name group-by))
dataset)))
(comment
(let [xf
(comp
($where {:b {:$gt 10}})
($rollup :sum [:b :c] [:a]))]
(into [] xf [{:a 1 :b 10 :c 2 :d 1}
{:a 2 :b 11 :c 2 :d 2}
{:a 1 :b 12 :c 2 :d 1}
{:a 2 :b 13 :c 2 :d 2}])))
(defn $where [query-map dataset]
(update-dataset identity
(wrap-with-transducer (filter (incanter/query-to-pred query-map)))
dataset))
(defn dataset [meta-data rows]
(assoc (incanter/dataset (:column-names meta-data) [])
:meta-data meta-data
:rows rows))
(defn update-transducer [f]
(fn
([rf]
(fn
([] (rf))
([result]
(rf result))
([result input]
(rf result (f input)))))))
(defn add-derived-column
([column-name f dataset]
(update-dataset #(conj % column-name)
(wrap-with-transducer
(update-transducer
(fn [row]
(assoc row column-name (f row)))))
dataset))
([column-name from-columns f dataset]
(add-derived-column column-name
(fn [row]
(apply f (map row from-columns)))
dataset)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment