Last active
April 12, 2018 09:12
-
-
Save sbelak/f47e8530770abb956ba7bf489b3d9408 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn- sanitize-key | |
[k] | |
(s/replace (name k) "-" "_")) | |
(defn- sanitize-val | |
[v] | |
(cond | |
(keyword? v) (sanitize-val (name v)) | |
(string? v) (format "\"%s\"" v) | |
:else v)) | |
(defn- ->options-list | |
[opts] | |
(->> (for [[k v] opts] | |
[(sanitize-key k) (sanitize-val v)]) | |
(map (partial s/join "=") ) | |
(s/join ", "))) | |
(def tsne-template | |
(partial format | |
"import numpy as np | |
import sklearn.manifold | |
np.savetxt(\"%s\", sklearn.manifold.TSNE(%s).fit_transform(np.loadtxt(\"%s\", delimiter=\",\",skiprows=1)), delimiter=\",\")")) | |
(def hdbscan-template | |
(partial format | |
"import numpy as np | |
import hdbscan | |
np.savetxt(\"%s\", hdbscan.HDBSCAN(%s).fit_predict(np.loadtxt(\"%s\", delimiter=\",\",skiprows=1)), delimiter=\",\")")) | |
(defn- runpy | |
[template opts df] | |
(let [{:keys [columns header cast-fns]} opts | |
in-file (File/createTempFile "in-" ".csv") | |
in (.getAbsolutePath in-file) | |
out-file (File/createTempFile "out-" ".csv") | |
out (.getAbsolutePath out-file) | |
_ (csv/spit-csv in (preprocess columns df)) | |
_ (with-programs [python echo] | |
(-> (template out (->options-list (dissoc opts :columns | |
:header | |
:cast-fns)) | |
in) | |
(echo {:seq true}) | |
python)) | |
result (csv/slurp-csv out :header header :cast-fns cast-fns) | |
_ (.delete in-file) | |
_ (.delete out-file)] | |
result)) | |
(defn tsne | |
([df] | |
(tsne {} df)) | |
([opts df] | |
(->> df | |
(runpy tsne-template (assoc opts :header [:tsne-0 :tsne-1] | |
:cast-fns {:tsne-0 csv/->double | |
:tsne-1 csv/->double})) | |
(map merge df)))) | |
(defn hdbscan | |
([df] | |
(hdbscan {} df)) | |
([opts df] | |
(->> df | |
(runpy hdbscan-template | |
(assoc opts :header [:label] | |
:cast-fns {:label (comp keyword str csv/->int)})) | |
(map merge df)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment