Created
October 22, 2014 19:09
-
-
Save llasram/e75c6bbd9b8567e96681 to your computer and use it in GitHub Desktop.
Parse giant JSON objects as reducer of key-value pairs.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns repubsub.flat-json | |
(:require [clojure.core.protocols :as ccp] | |
[clojure.java.io :as io] | |
[cheshire (core :as json) (parse :as parse) (factory :as factory)]) | |
(:import [com.fasterxml.jackson.core | |
, JsonParser JsonFactory JsonFactory$Feature JsonGenerator$Feature | |
, JsonToken] | |
[java.io | |
, StringWriter StringReader BufferedReader BufferedWriter | |
, ByteArrayOutputStream PushbackReader])) | |
(defn reducer | |
[uri] | |
(reify | |
ccp/CollReduce | |
(coll-reduce [this f] (ccp/coll-reduce this f (f))) | |
(coll-reduce [this f init] | |
(let [fac ^JsonFactory (or factory/*json-factory* factory/json-factory) | |
_ (.configure fac JsonFactory$Feature/INTERN_FIELD_NAMES false)] | |
(with-open [rdr (io/reader uri) | |
jp (.createJsonParser fac rdr)] | |
(let [t (.nextToken jp)] | |
(cond | |
(nil? t) init | |
(not (identical? JsonToken/START_OBJECT t)) | |
, (throw (ex-info "Not start of object" {:token t})) | |
:else | |
, (loop [acc init, t (.nextToken jp)] | |
(if (identical? JsonToken/END_OBJECT t) | |
acc | |
(let [key (.getText jp), _ (.nextToken jp) | |
val (parse/parse* jp identity false nil) | |
acc (f acc [key val])] | |
(if (reduced? acc) | |
@acc | |
(recur acc (.nextToken jp))))))))))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment