-
-
Save jmgimeno/952861 to your computer and use it in GitHub Desktop.
Multi-level Grouping and Computations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns pas.tree | |
(:use [clojure.pprint :only [pprint]])) | |
(def reg-cntry-list | |
{"America" ["USA" "Canada" "Mexico" "Venezuala" "Brazil" "Argentina" "Cuba"] | |
"Asia" ["India" "Pakistan" "Singapore" "China" "Japan" "Sri Lanka" "Malaysia"] | |
"Europe" ["UK" "Germany" "France" "Italy" "Belgium" "Turkey" "Finland"] | |
"Middle East" ["Saudi Arabia" "Bahrain" "UAE" "Kuwait" "Yemen" "Qatar" "Iraq"] | |
"Africa" ["Libya" "Tanzania" "South Africa" "Kenya" "Ethiopia" "Morocco" "Zimbabwe"]}) | |
(def sec-ind-list | |
{"Basic Materials" ["Apparel" "Auto Part" "Building" "Packaged"] | |
"Consumer Goods" ["Beveragess" "Cigarettes" "Drugs" "Newspapers"] | |
"Financial" ["Life Insurance" "Banking" "Investment" "Funds"] | |
"Healthcare" ["Home care" "Hospitals" "Plans" "Medical"] | |
"Industrial" ["Chemicals" "Cleaning" "Machine" "Lumber"] | |
"Services" ["Advertising" "Broadcasting" "Education" "Publishing"] | |
"Technology" ["Biotechnology" "Computers" "Data Storage" "Electronics"] | |
"Utilities" ["Farm Products" "Electric" "Gas" "Oil"]}) | |
(defn get-rec [] | |
(let | |
[r (rand-nth (keys reg-cntry-list)) | |
s (rand-nth (keys sec-ind-list))] | |
{:sec_id (rand-int 1000) | |
:attr1 r | |
:attr2 (rand-nth (reg-cntry-list r)) | |
:attr3 s | |
:attr4 (rand-nth (sec-ind-list s)) | |
:mv (rand 1000000) | |
})) | |
;generate 50 random records | |
(def data (take 50 (repeatedly get-rec))) | |
(defn sum-by [data attrs] | |
(let [aggregated (group-by (apply juxt attrs) data)] | |
(zipmap (keys aggregated) (map #(reduce + (map :mv %)) (vals aggregated))))) | |
;(println (sum-by data [:attr1 :attr2])) | |
; tree-grouping | |
(def data (take 50 (repeatedly get-rec))) | |
(defn mlg [attrs data] | |
(if (empty? attrs) | |
[ (reduce + (map :mv data)) {:children data}] | |
(let [parts (group-by (first attrs) data) | |
subtrees (map (fn [[value data]] | |
[value (mlg (rest attrs) (map #(dissoc % (first attrs)) data))]) | |
parts)] | |
(reduce (fn [[sum tree] [value [sumsubtree subtree]]] | |
[(+ sum sumsubtree) | |
(update-in tree [:children] conj (assoc subtree | |
:path [(first attrs) value] | |
:mv sumsubtree))] | |
) | |
[ 0.0 { :children [] }] | |
subtrees)))) | |
(comment | |
(def mydata (take 10 (repeatedly get-rec))) | |
(mlg [] mydata) | |
(mlg [:attr1] mydata) | |
(mlg [:attr1 :attr2] mydata) | |
(mlg [:attr1 :attr2 :attr3 :attr4 :attr5] mydata) | |
(mlg [:attr1 :attr2 :attr3 :attr4 :attr5] data) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment