Skip to content

Instantly share code, notes, and snippets.

@jmgimeno
Forked from shuaybi/group.clj
Created May 3, 2011 05:43
Show Gist options
  • Save jmgimeno/952861 to your computer and use it in GitHub Desktop.
Save jmgimeno/952861 to your computer and use it in GitHub Desktop.
Multi-level Grouping and Computations
(ns pas.tree
(:use [clojure.pprint :only [pprint]]))
(def reg-cntry-list
{"America" ["USA" "Canada" "Mexico" "Venezuala" "Brazil" "Argentina" "Cuba"]
"Asia" ["India" "Pakistan" "Singapore" "China" "Japan" "Sri Lanka" "Malaysia"]
"Europe" ["UK" "Germany" "France" "Italy" "Belgium" "Turkey" "Finland"]
"Middle East" ["Saudi Arabia" "Bahrain" "UAE" "Kuwait" "Yemen" "Qatar" "Iraq"]
"Africa" ["Libya" "Tanzania" "South Africa" "Kenya" "Ethiopia" "Morocco" "Zimbabwe"]})
(def sec-ind-list
{"Basic Materials" ["Apparel" "Auto Part" "Building" "Packaged"]
"Consumer Goods" ["Beveragess" "Cigarettes" "Drugs" "Newspapers"]
"Financial" ["Life Insurance" "Banking" "Investment" "Funds"]
"Healthcare" ["Home care" "Hospitals" "Plans" "Medical"]
"Industrial" ["Chemicals" "Cleaning" "Machine" "Lumber"]
"Services" ["Advertising" "Broadcasting" "Education" "Publishing"]
"Technology" ["Biotechnology" "Computers" "Data Storage" "Electronics"]
"Utilities" ["Farm Products" "Electric" "Gas" "Oil"]})
(defn get-rec []
(let
[r (rand-nth (keys reg-cntry-list))
s (rand-nth (keys sec-ind-list))]
{:sec_id (rand-int 1000)
:attr1 r
:attr2 (rand-nth (reg-cntry-list r))
:attr3 s
:attr4 (rand-nth (sec-ind-list s))
:mv (rand 1000000)
}))
;generate 50 random records
(def data (take 50 (repeatedly get-rec)))
(defn sum-by [data attrs]
(let [aggregated (group-by (apply juxt attrs) data)]
(zipmap (keys aggregated) (map #(reduce + (map :mv %)) (vals aggregated)))))
;(println (sum-by data [:attr1 :attr2]))
; tree-grouping
(def data (take 50 (repeatedly get-rec)))
(defn mlg [attrs data]
(if (empty? attrs)
[ (reduce + (map :mv data)) {:children data}]
(let [parts (group-by (first attrs) data)
subtrees (map (fn [[value data]]
[value (mlg (rest attrs) (map #(dissoc % (first attrs)) data))])
parts)]
(reduce (fn [[sum tree] [value [sumsubtree subtree]]]
[(+ sum sumsubtree)
(update-in tree [:children] conj (assoc subtree
:path [(first attrs) value]
:mv sumsubtree))]
)
[ 0.0 { :children [] }]
subtrees))))
(comment
(def mydata (take 10 (repeatedly get-rec)))
(mlg [] mydata)
(mlg [:attr1] mydata)
(mlg [:attr1 :attr2] mydata)
(mlg [:attr1 :attr2 :attr3 :attr4 :attr5] mydata)
(mlg [:attr1 :attr2 :attr3 :attr4 :attr5] data)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment