Created
September 1, 2017 13:07
-
-
Save borkdude/ddc9433c396b1ff43a091cf2901b9dc7 to your computer and use it in GitHub Desktop.
Clojure text files transducer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns text-xform | |
(:require [clojure.java.io :as io] | |
[clojure.string :as str] | |
[cheshire.core :as json]) | |
(:import [java.io BufferedReader])) | |
;;;; inspired by https://tech.grammarly.com/blog/building-etl-pipelines-with-clojure | |
(def db (atom 0)) | |
(defn save-into-database [batch] | |
(println batch) | |
(swap! db + (count batch))) | |
(defn lines-reducible [^BufferedReader rdr] | |
(reify clojure.lang.IReduceInit | |
(reduce [this f init] | |
(println "init" init) | |
(try | |
(loop [state init] | |
(if (reduced? state) | |
state | |
(if-let [line (.readLine rdr)] | |
(recur (f state line)) | |
state))) | |
(finally (.close rdr)))))) | |
(defn process-with-transducers [files] | |
(transduce (comp | |
(mapcat #(lines-reducible (io/reader %))) ;; all lines from all files | |
(map #(json/decode % true)) ;; decode line by line | |
(partition-all 10) ;; make groups of 10 | |
(map save-into-database)) ;; save-into-database sees every group of 10 | |
(constantly nil) | |
"dude" ;; this is not relevant | |
files)) | |
(comment | |
(process-with-transducers ["/tmp/foo.json"])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment