Last active
April 15, 2022 09:53
-
-
Save evilsneer/bfe33e7a58941bddb100a4bdd1991f80 to your computer and use it in GitHub Desktop.
bb csv count distinct fields
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(require '[clojure.data.csv :as csv] | |
'[clojure.tools.cli :refer [parse-opts]]) | |
(def cli-options | |
;; An option with a required argument | |
[["-f" "--file FILE" "File path" | |
; :default 80 | |
; :parse-fn #(Integer/parseInt %) | |
:validate [string? "Must be a string"]] | |
["-h" "--help"]]) | |
(def file (:file (:options (parse-opts *command-line-args* cli-options)))) | |
(defn csv-data->maps [csv-data] | |
(map zipmap | |
(->> (first csv-data) ;; First row is the header | |
(map keyword) ;; Drop if you want string keys instead | |
repeat) | |
(rest csv-data))) | |
(defn read [filename] | |
(csv-data->maps (csv/read-csv | |
(slurp filename) :separator \tab))) | |
(defn counts [filename] | |
(println filename) | |
(let [data (read filename) | |
ks (keys (first data))] | |
(pmap (juxt identity #(count (distinct (map % data)))) ks))) | |
(counts file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment