Created
June 10, 2016 15:27
-
-
Save sir-pinecone/10728b5ed49335d353af30c29897ff52 to your computer and use it in GitHub Desktop.
A very simple csv parser in Clojure
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn lazy-file-lines [file] | |
(try | |
(letfn [(helper [rdr] | |
(lazy-seq | |
(if-let [line (.readLine rdr)] | |
(cons line (helper rdr)) | |
(do (.close rdr) nil))))] | |
(helper (clojure.java.io/reader file))) | |
(catch Exception e nil))) | |
(defn csv-parse [headings s] | |
// NOTE: that regex is not perfect and can fail on some input | |
(->> (clojure.string/split s #"(?!\B\"[^\"]*),(?![^\"]*\"\B)") | |
(map clojure.string/trim) | |
(map (fn [s] (if (and (= \" (first s)) | |
(= \" (last s))) | |
(subs s 1 (dec (count s))) | |
s))) | |
(zipmap headings) | |
(reduce-kv | |
(fn [m k v] | |
(let [[k transform] (if (vector? k) | |
k [k #(if (seq %) %)])] | |
(assoc m k (transform v)))) | |
{}))) | |
(def cc-headings | |
[:date | |
:posted | |
[:amount #(if (and (string? %) (seq %)) | |
(let [first-char (subs % 0 1) | |
n (case first-char | |
"$" (subs % 1) | |
"(" (subs % 2 (dec (count %))) ; credit type | |
%)] | |
(Float. n)) | |
0.0)] | |
[:merchant clojure.string/upper-case] | |
[:merchant-city clojure.string/capitalize] | |
[:merchant-state clojure.string/upper-case] | |
:merchant-zip | |
:ref-number | |
[:type #(case (clojure.string/lower-case %) | |
"c" :credit | |
"d" :debit)]]) | |
(->> "path/to/data.csv" | |
lazy-file-lines | |
rest | |
(map #(csv-parse cc-headings %))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment