Created
February 11, 2011 18:15
-
-
Save willtim/822769 to your computer and use it in GitHub Desktop.
Simple Clojure API for VTD-XML - much faster than clojure.contrib.zip-filter.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns willtim.clj-vtd-xml | |
(:import [com.ximpleware VTDGen VTDNav AutoPilot]) | |
(:require | |
[clojure.contrib.duck-streams :as ds])) | |
;; | |
;; Clojure API for VTD-XML | |
;; | |
;; Designed to work like clojure.contrib.zip-filter.xml, e.g. | |
;; | |
;; (for [book (vtd-> doc :book) | |
;; report (vtd-> book :trade :report)] | |
;; (vtd-content (vtd1-> report :reportName)) | |
;; | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(declare vtd->) | |
(declare vtd1->) | |
(declare vtd-xpath) | |
(declare vtd-content) | |
(defn parse-gzip-stream [in] | |
"Parses a GZIP XML inputstream into a VTDNav" | |
(let [ba (with-open [s (-> in | |
(java.io.BufferedInputStream.) | |
(java.util.zip.GZIPInputStream.))] | |
(ds/to-byte-array s)) | |
vg (doto (VTDGen.) | |
(.setDoc ba) | |
(.parse false))] | |
(.getNav vg))) | |
(defn vtd-> [vn & keywords] | |
(let [depth (.getCurrentDepth vn) | |
xpath (apply str (interpose "/" (map name keywords)))] | |
(if (= depth 0) | |
(vtd-xpath vn (str "//" xpath)) | |
(vtd-xpath vn xpath)))) | |
(defn vtd1-> [vn & keywords] | |
(let [col (apply (partial vtd-> vn) keywords)] | |
(if (empty? col) | |
nil | |
(first col)))) | |
(defn vtd-xpath [vn xpath] | |
(letfn [(vn-seq [vn ap] | |
(let [r (.evalXPath ap)] | |
(if (= r -1) | |
[] | |
(cons (.cloneNav vn) | |
(lazy-seq (vn-seq vn ap))))))] | |
(let [vn* (.cloneNav vn) | |
ap (doto (AutoPilot. vn*) | |
(.selectXPath xpath))] | |
(vn-seq vn* ap)))) | |
(defn vtd-content [vn] | |
(if (nil? vn) | |
"" | |
(.toNormalizedString vn (.getText vn)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment