Created
July 20, 2020 16:00
-
-
Save harold/7335b78606f8e962f2b385f1ed79d15c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns parallel-sum.core | |
(:require [tech.ml.dataset :as ds] | |
[tech.v2.datatype.functional :as dfn] | |
[tech.v2.datatype :as dt])) | |
(defn- produce-data-csv! | |
[] | |
(let [source-data (for [_ (range 1000000)] | |
{"letter" (+ 65 (rand-int 25)) | |
"value" (rand-int 1000)})] | |
(ds/write-csv! (ds/->dataset source-data) "./data.csv"))) | |
(defn- profile-sums! | |
[] | |
(let [ds (ds/->dataset "./data.csv") | |
mapseq (ds/mapseq-reader ds) | |
safe-adder (fnil + 0)] | |
(println "Naive:") | |
(time | |
(clojure.pprint/pprint | |
(->> (reduce (fn [eax {:strs [letter value]}] | |
(update eax letter safe-adder value)) | |
{} | |
mapseq) | |
(sort-by first)))) | |
(println "Dataset:") | |
(time | |
(clojure.pprint/pprint | |
(->> (ds/group-by-column "letter" ds) | |
(map (fn [[letter letter-ds]] | |
(let [value-col (dt/set-datatype (letter-ds "value") :int64)] | |
[letter (dfn/reduce-+ value-col)]))) | |
(sort-by first)))))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defproject parallel-sum "0.1.0-SNAPSHOT" | |
:description "FIXME: write description" | |
:url "http://example.com/FIXME" | |
:license {:name "EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0" | |
:url "https://www.eclipse.org/legal/epl-2.0/"} | |
:dependencies [[org.clojure/clojure "1.10.1"] | |
[techascent/tech.ml.dataset "3.07"]] | |
:repl-options {:init-ns parallel-sum.core}) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
parallel-sum.core> (produce-data-csv!) | |
nil | |
parallel-sum.core> (ds/->dataset "./data.csv") | |
./data.csv [1000000 2]: | |
| letter | value | | |
|--------|-------| | |
| 88 | 266 | | |
| 75 | 444 | | |
| 65 | 982 | | |
| 76 | 494 | | |
| 89 | 492 | | |
| 71 | 957 | | |
| 79 | 427 | | |
| 79 | 975 | | |
| 69 | 371 | | |
| 83 | 121 | | |
| 77 | 282 | | |
| 69 | 514 | | |
| 82 | 732 | | |
| 82 | 714 | | |
| 69 | 673 | | |
| 74 | 62 | | |
| 68 | 964 | | |
| 72 | 456 | | |
| 74 | 138 | | |
| 77 | 932 | | |
| 74 | 737 | | |
| 73 | 11 | | |
| 76 | 91 | | |
| 89 | 497 | | |
| 79 | 581 | | |
parallel-sum.core> (profile-sums!) | |
Naive: | |
([65 20103937] | |
[66 19949511] | |
[67 19887138] | |
[68 19687570] | |
[69 20099229] | |
[70 19826730] | |
[71 20090122] | |
[72 20137840] | |
[73 19921709] | |
[74 19915853] | |
[75 19777532] | |
[76 19958228] | |
[77 19925723] | |
[78 20073555] | |
[79 20020942] | |
[80 20126577] | |
[81 19983156] | |
[82 20227374] | |
[83 19928603] | |
[84 20018642] | |
[85 19825865] | |
[86 20142185] | |
[87 20110942] | |
[88 19800280] | |
[89 19804021]) | |
"Elapsed time: 881.913549 msecs" | |
Dataset: | |
([65 20103937] | |
[66 19949511] | |
[67 19887138] | |
[68 19687570] | |
[69 20099229] | |
[70 19826730] | |
[71 20090122] | |
[72 20137840] | |
[73 19921709] | |
[74 19915853] | |
[75 19777532] | |
[76 19958228] | |
[77 19925723] | |
[78 20073555] | |
[79 20020942] | |
[80 20126577] | |
[81 19983156] | |
[82 20227374] | |
[83 19928603] | |
[84 20018642] | |
[85 19825865] | |
[86 20142185] | |
[87 20110942] | |
[88 19800280] | |
[89 19804021]) | |
"Elapsed time: 48.433772 msecs" | |
nil |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment