Last active
October 14, 2021 17:43
-
-
Save joinr/74f6c3dc0d179978616101d9f1954536 to your computer and use it in GitHub Desktop.
exploring datasets and transducers in tech.ml.dataset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns transducertest | |
(:require [tech.v3.dataset :as ds] | |
[clojure.core.reducers :as r])) | |
(def d (ds/->dataset {:a [1 2 3] :b [:foo :bar :baz]})) | |
(defprotocol ITransposable | |
(-row-major [obj]) | |
(-column-major [obj])) | |
(deftype row-view [ds rows] | |
clojure.lang.IPersistentCollection | |
(cons [this r] | |
(row-view. ds (reduce-kv (fn [acc k v] | |
(assoc acc k (conj (rows k []) v))) | |
rows r))) | |
(empty [this] (row-view. (empty ds) {})) | |
ITransposable | |
(-column-major [this] | |
(ds/->dataset rows)) | |
(-row-major [this] this) | |
clojure.lang.Seqable | |
(seq [this] (concat (ds/mapseq-reader ds) | |
(ds/mapseq-reader (ds/->dataset rows))))) | |
(extend-protocol | |
clojure.core.protocols/CollReduce | |
row-view | |
(coll-reduce [coll f] | |
(reduce f (r/cat (ds/mapseq-reader (.ds coll)) | |
(ds/mapseq-reader (ds/->dataset (.rows coll)))))) | |
(coll-reduce [coll f init] | |
(reduce f init (r/cat (ds/mapseq-reader (.ds coll)) | |
(ds/mapseq-reader (ds/->dataset (.rows coll))))))) | |
(extend-protocol | |
ITransposable | |
tech.v3.dataset.impl.dataset.Dataset | |
(-row-major [this] (row-view. this {})) | |
(-column-major [this] this) | |
nil | |
(-row-major [this] (row-view. (ds/->dataset {}) {})) | |
(-column-major [this] (ds/->dataset {})) | |
clojure.lang.PersistentArrayMap | |
(-row-major [this] (row-view. (ds/->dataset {}) {})) | |
(-column-major [this] (ds/->dataset {}))) | |
(defn row-major [coll] | |
(if (extends? ITransposable (type coll)) | |
(-row-major coll) | |
(if (seq coll) | |
coll | |
(throw (ex-info "under construction!" {}))))) | |
(defn column-major [coll] | |
(if (extends? ITransposable (type coll)) | |
(-column-major coll) | |
(if (seq coll) | |
coll | |
(throw (ex-info "under construction!" {}))))) | |
(defn into-dataset | |
([to] (column-major to)) | |
([to from] | |
(->> from | |
row-major | |
(into (row-major to)) | |
column-major)) | |
([to xform from] | |
(->> from | |
row-major | |
(into (row-major to) xform) | |
column-major))) | |
(def +empty-records+ (row-major (ds/->dataset {}))) | |
;;long way... | |
(->> (row-major d) | |
(transduce (map (fn [{:keys [a] :as r}] | |
(assoc r :c (* a 3)))) | |
conj | |
(empty (row-major d))) | |
(column-major)) | |
;; | :b | :a | :c | | |
;; |------|---:|---:| | |
;; | :foo | 1 | 3 | | |
;; | :bar | 2 | 6 | | |
;; | :baz | 3 | 9 | | |
(def xf (map (fn [{:keys [a] :as r}] | |
(assoc r :c (* a 3))))) | |
(->> d | |
row-major | |
(into +empty-records+ xf) | |
column-major) | |
;; | :b | :a | :c | | |
;; |------|---:|---:| | |
;; | :foo | 1 | 3 | | |
;; | :bar | 2 | 6 | | |
;; | :baz | 3 | 9 | | |
;;testing | |
;;equivalent | |
(into-dataset +empty-records+ xf d) | |
;; | :b | :a | :c | | |
;; |------|---:|---:| | |
;; | :foo | 1 | 3 | | |
;; | :bar | 2 | 6 | | |
;; | :baz | 3 | 9 | | |
(into-dataset {} xf d) | |
;; | :b | :a | :c | | |
;; |------|---:|---:| | |
;; | :foo | 1 | 3 | | |
;; | :bar | 2 | 6 | | |
;; | :baz | 3 | 9 | | |
(->> (range 10) | |
(into (empty (row-major d)) (map (fn [i] {:a i :b i :c (* i i)}))) | |
column-major) | |
;; _unnamed [10 3]: | |
;; | :a | :b | :c | | |
;; |---:|---:|---:| | |
;; | 0 | 0 | 0 | | |
;; | 1 | 1 | 1 | | |
;; | 2 | 2 | 4 | | |
;; ... | |
;; | 7 | 7 | 49 | | |
;; | 8 | 8 | 64 | | |
;; | 9 | 9 | 81 | | |
(->> (range 10) | |
(into (empty (row-major d)) (map (fn [i] {:a i :b i :c (* i i)}))) | |
(transduce (map (fn [{:keys [a b c]}] (+ a b c))) +)) | |
;;375 | |
(->> (range 10) | |
(into (empty (row-major d)) (map (fn [i] {:a i :b i :c (* i i)}))) | |
(into [] (map (fn [{:keys [a b c]}] (+ a b c))) )) | |
;; (column-major (into (row-major (empty d)) | |
;; (comp (filter #(= (:b %) :baz)) | |
;; (map (fn [r] (assoc r :c "some-value")))) | |
;; (row-major d))) | |
;;equivalent | |
(into-dataset {} (comp (filter #(= (:b %) :baz)) | |
(map (fn [r] (assoc r :c "some-value")))) d) | |
;; | :b | :a | :c | | |
;; |------|---:|------------| | |
;; | :baz | 3 | some-value | | |
;;use seq abstraction | |
(->> (row-major d) ;;semantically equivalent to ds/mapseq-reader here | |
(filter #(= (:b %) :baz)) | |
(map (fn [r] (assoc r :c "some-value"))) | |
(into-dataset {})) | |
;; | :b | :a | :c | | |
;; |------|---:|------------| | |
;; | :baz | 3 | some-value | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment