Last active
April 3, 2019 22:19
-
-
Save josephwilk/17015a54b9146dec16b047ced2d6afd4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(set-env! | |
:dependencies '[[boot/core "2.8.2" :scope "provided"] | |
[boot/pod "2.8.2" :scope "provided"] | |
[boot/base "2.8.2" :scope "provided"] | |
[http-kit "2.2.0"] | |
[enlive "1.1.6"] | |
[cheshire "5.7.1"] | |
[clojure-csv/clojure-csv "2.0.1"] | |
[clj-time "0.13.0"] | |
[org.clojure/core.match "0.3.0-alpha4"] | |
] | |
:resource-paths #{"resources" "src"}) | |
(task-options! | |
pom {:project 'companies2gdp | |
:version "0.0.1-SNAPSHOT"}) | |
(ns boot.user | |
(:require | |
[org.httpkit.client :as http-kit] | |
[net.cgrand.enlive-html :as html] | |
[cheshire.core :as json] | |
[clojure.edn :as edn])) | |
(defn parse-int [s] | |
(try | |
(Double/parseDouble (re-find #"\A-?\d+" s)) | |
(catch Exception e))) | |
(defn gdp [] | |
"Extract GDP figures for countries" | |
(let [data (slurp "prices.csv") | |
lines (->> (clojure.string/split data #"\n") | |
(map (fn [line] (clojure.string/split line #"\t+"))) | |
(map (fn [data] {:country (clojure.string/trim (nth data 1)) | |
:gdp | |
(-> (clojure.string/replace (nth data 2) "," "") | |
(parse-int) | |
(* 1000000))})))] | |
(spit "gdp.edn" (prn-str lines)))) | |
(defn companies [] | |
"Extract compaines and profit" | |
(let [companies (map | |
(fn [l] (clojure.string/split l #"\t")) | |
(-> (slurp "companies.tsv") | |
(clojure.string/split #"\n"))) | |
data (map (fn [company] {:name (nth company 1) | |
:price (-> | |
(nth company 2) | |
(clojure.string/replace #"\$" "") | |
(clojure.string/replace #"," "") | |
(parse-int) | |
(* 1000000) | |
)}) companies)] | |
(spit "companies.edn" (prn-str data)))) | |
(defn flags [] | |
"Flag lookup" | |
(let [flags (map | |
#(clojure.string/split %1 #"\t") | |
(-> | |
(slurp "flags.tsv") | |
(clojure.string/split #"\n")))] | |
(reduce (fn [acc flag] (assoc acc (last flag) (nth flag 1))) {} flags))) | |
(defonce flag-lookup (flags)) | |
(defn format [data] | |
(->> (map (fn [company] | |
(str (:name company) "\n" (clojure.string/join "" (map (fn [a] (get flag-lookup (:country a))) (:gdp company) )))) data) | |
(clojure.string/join "\n"))) | |
(deftask join [] | |
"Combine GDP, companies and flag data" | |
(let [prices (edn/read-string (slurp "gdp.edn")) | |
companies (edn/read-string (slurp "companies.edn")) | |
data (->> | |
companies | |
(map (fn [company] | |
(->> | |
(map | |
(fn [price] | |
(let [gdp (:gdp price) | |
company-price (:price company)] | |
(when (and gdp company-price | |
(> company-price gdp)) | |
(do | |
price)))) | |
prices) | |
(filter seq) | |
flatten | |
(sort-by (fn [v] (:gdp v))) | |
reverse | |
(assoc company :gdp)))) | |
(filter #(seq (:gdp %1))))] | |
(spit "out.txt" (format data)))) | |
(comment | |
(join) | |
(companies) | |
(gdp) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment