Created
September 27, 2018 22:24
-
-
Save favila/669c85a6703b454604a140fc962deafd to your computer and use it in GitHub Desktop.
create a clojure-data-structure representation of a pdf document (from pdfbox) just for visibility into the pdf's structure
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(with-open [pdf-document (PDDocument/load (clojure.java.io/file THE-FILE))] | |
(let [doc (.getDocument pdf-document) | |
getobj (fn [^COSObject x] | |
[:pdf.type/object (.getObjectNumber x) (.getGenerationNumber x)]) | |
visit (fn [^COSBase x ^ICOSVisitor vis] | |
(if (.isDirect x) | |
(if (instance? COSObject x) | |
(getobj x) | |
(.accept x vis)) | |
(cond | |
(instance? COSObject x) | |
[:pdf.type/indirect (getobj x)] | |
(or | |
(instance? COSBoolean x) | |
(instance? COSNumber x) | |
(instance? COSName x) | |
(instance? COSNull x)) | |
(.accept x vis) | |
:else | |
[:pdf.type/indirect | |
;; GetKey will never work because of identity | |
(.accept x vis)])))] | |
(.accept doc | |
^ICOSVisitor | |
(reify | |
ICOSVisitor | |
(visitFromArray [vis x] | |
[:pdf.type/array (mapv #(visit % vis) (iterator-seq (.iterator x)))]) | |
(visitFromBoolean [vis x] (.getValue x)) | |
(visitFromDictionary [vis x] | |
(into {} | |
(map #(do [(visit (key %) vis) (visit (val %) vis)])) | |
(.entrySet x))) | |
(visitFromDocument [vis x] | |
[:pdf.type/doc | |
(.accept (.getDocumentID x) vis) | |
(into [] | |
(map #(do [(getobj %) | |
(.accept (.getObject %) vis)])) | |
(.getObjects x))]) | |
(visitFromFloat [vis x] (.doubleValue x)) | |
(visitFromInt [vis x] (.longValue x)) | |
(visitFromName [vis x] (keyword "pdf.name" (.getName x))) | |
(visitFromNull [vis x] nil) | |
(visitFromStream [vis x] [:pdf.type/stream (.getLength x) #_(subs (.toTextString x) 0 100)]) | |
(visitFromString [vis x] [:pdf.type/string (.getString x)]))) | |
)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment