Created
June 14, 2013 13:01
-
-
Save Velrok/5781630 to your computer and use it in GitHub Desktop.
A collection of methods related to incanter datasets that I found myself writing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns athena.incanter-extensions | |
(:use [incanter.core :only [head $ $where nrow dataset? to-dataset]]) | |
(:import [java.lang Math])) | |
(defn tail | |
"Like incanter.core/head but returning the tail." | |
([mat] (tail 10 mat)) | |
([len mat] | |
(let [upper-bound (nrow mat) | |
start-idx (- upper-bound len) | |
lower-bound (max 0 start-idx)] | |
($ (range start-idx upper-bound) :all mat)))) | |
(defn partition-dataset | |
"Works like partition, but operating on datasets instead of sequences." | |
[n ds] | |
{:pre [(dataset? ds) | |
(number? n)]} | |
(->> ds | |
:rows | |
(partition n) | |
(map to-dataset))) | |
(defn split-dataset-by | |
[col-key n-fractions data-set] | |
{:pre [(keyword? col-key) | |
(number? n-fractions) | |
(dataset? data-set)] | |
:post [(= n-fractions | |
(count %))]} | |
(let [uniq-col-values (set ($ col-key data-set)) | |
partition-size (int (Math/ceil (/ (count uniq-col-values) | |
n-fractions))) | |
_ (println :partiton-size partition-size) | |
partitions (partition partition-size | |
partition-size | |
[] | |
uniq-col-values)] | |
(for [part partitions] | |
($where {col-key {:$in (set part)}} | |
data-set)))) | |
(defn merge-datasets | |
[data-sets] | |
{:pre [(seq? (seq data-sets))] | |
:post [(dataset? %)]} | |
(->> data-sets | |
(map :rows) | |
flatten | |
to-dataset)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns athena.incanter-extensions-spec | |
(:use [athena.incanter-extensions] | |
[incanter core] | |
[speclj.core])) | |
(describe "partition-dataset" | |
(with ds (to-dataset [{:a 1 :b 0} | |
{:a 2 :b 9} | |
{:a 3 :b 8} | |
{:a 4 :b 7} | |
{:a 5 :b 6}])) | |
(with expectation [(to-dataset [{:a 1 :b 0} | |
{:a 2 :b 9}]) | |
(to-dataset [{:a 3 :b 8} | |
{:a 4 :b 7}])]) | |
(it "works like partition but on a dataset" | |
(should= @expectation | |
(partition-dataset 2 @ds)))) | |
(describe "split-dataset-by" | |
(with ds (to-dataset [{:a 1 :b 0} | |
{:a 1 :b 1} | |
{:a 2 :b 2} | |
{:a 2 :b 3} | |
{:a 3 :b 4} | |
{:a 3 :b 5} | |
{:a 4 :b 6} | |
{:a 5 :b 7}])) | |
(with expectation [(to-dataset [{:a 1 :b 0} | |
{:a 1 :b 1} | |
{:a 2 :b 2} | |
{:a 2 :b 3}]) | |
(to-dataset [{:a 3 :b 4} | |
{:a 3 :b 5} | |
{:a 4 :b 6}]) | |
(to-dataset [{:a 5 :b 7}])]) | |
(it "retuns a seq of exactly the number of sets requests" | |
(should= @expectation | |
(vec (split-dataset-by :a 3 @ds))))) | |
(describe "merge-datasets" | |
(with input [(to-dataset [{:a 1 :b 2} | |
{:a 2 :b 2}]) | |
(to-dataset [{:a 1 :b 2} | |
{:a 2 :b 2}])]) | |
(with expected (to-dataset [{:a 1 :b 2} | |
{:a 2 :b 2} | |
{:a 1 :b 2} | |
{:a 2 :b 2}])) | |
(it "merges a seq of data sets into one" | |
(should= @expected | |
(merge-datasets @input)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Since versino 1.5.0 incanter comes with its own tail implementation.