Created
June 21, 2018 08:27
-
-
Save lnostdal/bf83f3fcf07ec5f625819c82899443e9 to your computer and use it in GitHub Desktop.
Dealing with big files from Clojure via GC finalize and laziness via map/line-seq.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ;; https://www.Quanto.ga/ | |
| ;; NOTE: The normal and sane(!) way to do this would be to use an outer WITH-OPEN | |
| ;; – or even better put your stuff in a database! | |
| (deftype GCedResource ;; Clojure WITH-OPEN macro (sort of?) on drugs. Bonus: It will work in context of laziness. | |
| [resource] | |
| Object | |
| (finalize [o] | |
| #_(info "[GCedResource, finalize]:" resource) | |
| (.close resource))) | |
| (defn histdata-open-file [^String filename] | |
| "Returns: A reader (file resource) wrapped in GCedResource." | |
| (GCedResource. | |
| (clojure.java.io/reader | |
| (java.util.zip.GZIPInputStream. | |
| (clojure.java.io/input-stream | |
| (clojure.java.io/file filename)))))) | |
| (defn -histdata-import-ticks [^GCedResource file ^String msymbol] | |
| "`file`: Return value from HISTDATA-OPEN-FILE." | |
| (let [dt-format (clj-time.format/formatter "YYYYMMdd HHmmssSSS")] | |
| (map (fn [^String line held-resource] ;; HACK: `held-resource` might be optimized away by the JIT? Or? | |
| (let [[datetime-str bid ask volume] (str/split line #",") | |
| bid (Double/parseDouble bid), ask (Double/parseDouble ask) | |
| volume (Double/parseDouble volume) | |
| datetime (clj-time.format/parse dt-format datetime-str)] | |
| (Tick. bid ask (if (zero? volume) ##NaN volume) datetime (time/epoch) | |
| :unknown msymbol))) | |
| (line-seq (.resource file)) | |
| (repeat file)))) | |
| (defn histdata-tick-seq [^String msymbol] ;; TODO: Add RSEQ variant? | |
| (let [filename-msymbol ({"SP500" "SPXUSD"} msymbol) | |
| filename-dates (mapcat (fn [year] | |
| (map (fn [month] (str year (format "%02d" month))) | |
| (range 01 13))) | |
| (range 2000 2018)) | |
| files (mapcat (fn [filename] | |
| (try | |
| [(histdata-open-file filename)] | |
| (catch java.io.FileNotFoundException e | |
| []))) | |
| (map (fn [filename-date] | |
| (str "/home/lnostdal/clojure/quantataraxia/resources/histdata.com/" (str/lower-case msymbol) | |
| "/DAT_ASCII_" filename-msymbol "_T_" filename-date ".csv.gz")) | |
| filename-dates))] | |
| (mapcat (fn [file] | |
| (try | |
| (-histdata-import-ticks file msymbol) | |
| (catch java.io.FileNotFoundException e | |
| #_(println (str e)) | |
| []))) | |
| files))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment