Last active
June 26, 2017 22:58
-
-
Save joycex99/7c7a28a69a308dcddbb896276945451b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defonce get-scaled-variances | |
(memoize | |
(fn [] | |
(let [{positives true negatives false} (group-by #(= (:label %) [0.0 1.0]) (create-dataset)) | |
pos-data (mat/matrix (map #(:data %) positives)) | |
variances (mat/matrix (map #(matstats/variance %) (mat/columns pos-data))) | |
scaled-vars (mat/mul (/ 5000 (mat/length variances)) variances)] | |
scaled-vars)))) | |
(defn add-rand-variance | |
"Given vector v, add random vector based off the variance of each feature" | |
[v scaled-vars] | |
(let [randv (map #(- (* 2 (rand %)) %) scaled-vars)] | |
(mapv + v randv))) | |
(defn augment-train-ds | |
"Takes train dataset and augments positive examples to reach 50/50 balance" | |
[orig-train] | |
(let [{train-pos true train-neg false} (group-by #(= (:label %) [0.0 1.0]) orig-train) | |
pos-data (map #(:data %) train-pos) | |
num-augments (- (count train-neg) (count train-pos)) | |
augments-per-sample (int (/ num-augments (count train-pos))) | |
augmented-data (apply concat (repeatedly augments-per-sample | |
#(mapv (fn [p] (add-rand-variance p (get-scaled-variances))) pos-data))) | |
augmented-ds (mapv (fn [d] {:data d :label [0 1]}) augmented-data)] | |
(shuffle (concat orig-train augmented-ds)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment