Created
May 2, 2011 21:14
-
-
Save shuaybi/952382 to your computer and use it in GitHub Desktop.
Multi-level Grouping and Computations
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns pas.tree | |
(:use [incanter core io charts datasets])) | |
(def reg-cntry-list | |
{"America" ["USA" "Canada" "Mexico" "Venezuala" "Brazil" "Argentina" "Cuba"] | |
"Asia" ["India" "Pakistan" "Singapore" "China" "Japan" "Sri Lanka" "Malaysia"] | |
"Europe" ["UK" "Germany" "France" "Italy" "Belgium" "Turkey" "Finland"] | |
"Middle East" ["Saudi Arabia" "Bahrain" "UAE" "Kuwait" "Yemen" "Qatar" "Iraq"] | |
"Africa" ["Libya" "Tanzania" "South Africa" "Kenya" "Ethiopia" "Morocco" "Zimbabwe"]}) | |
(def sec-ind-list | |
{"Basic Materials" ["Apparel" "Auto Part" "Building" "Packaged"] | |
"Consumer Goods" ["Beveragess" "Cigarettes" "Drugs" "Newspapers"] | |
"Financial" ["Life Insurance" "Banking" "Investment" "Funds"] | |
"Healthcare" ["Home care" "Hospitals" "Plans" "Medical"] | |
"Industrial" ["Chemicals" "Cleaning" "Machine" "Lumber"] | |
"Services" ["Advertising" "Broadcasting" "Education" "Publishing"] | |
"Technology" ["Biotechnology" "Computers" "Data Storage" "Electronics"] | |
"Utilities" ["Farm Products" "Electric" "Gas" "Oil"]}) | |
(defn get-rec [] | |
(let | |
[r (rand-nth (keys reg-cntry-list)) | |
s (rand-nth (keys sec-ind-list))] | |
{:sec_id (rand-int 1000) | |
:attr1 r | |
:attr2 (rand-nth (reg-cntry-list r)) | |
:attr3 s | |
:attr4 (rand-nth (sec-ind-list s)) | |
:mv (rand 1000000) | |
})) | |
;generate 50 random records | |
(def data (take 50 (repeatedly get-rec))) | |
(defn sum-by [data attrs] | |
(let [aggregated (group-by (apply juxt attrs) data)] | |
(zipmap (keys aggregated) (map #(reduce + (map :mv %)) (vals aggregated))))) | |
(println (sum-by data [:attr1 :attr2])) | |
(defn is-prog [ns] | |
(cond (apply = (map - (rest ns) ns)) "AP" | |
(apply = (map / (rest ns) ns)) "GP" | |
:else "Neither")) | |
(def ds (to-dataset data)) | |
(def cats ((comp seq sort set) (map :attr1 data))) | |
(defn cnt [k v data] (count (filter #(= (k %) v) data))) | |
(def cnts (map #(cnt :attr1 % data) cats)) | |
(view (to-dataset data)) | |
(view (line-chart cats cnts)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment