Created
January 18, 2025 13:08
-
-
Save geraldodev/3a4808a95b260612bc3c39d5225ab980 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; This was generated with chat.deepseeker.com in order to solve the problem that I'm reading excel tables | |
;; in clojure with duckdb jdbc driver and it does not have a way to enumerate excel worksheets | |
;; Plus side of this code is no dependencies | |
(ns util.enum-worksheet | |
(:import | |
(java.util Enumeration) | |
(java.util.zip ZipFile ZipEntry) | |
(java.io InputStream) | |
(javax.xml.parsers DocumentBuilderFactory DocumentBuilder) | |
(org.w3c.dom Document NodeList Node NamedNodeMap))) | |
(set! *warn-on-reflection* true) | |
(defn ^:private find-entry [^Enumeration entries name] | |
(some #(when (= (.getName ^ZipEntry %) name) %) (enumeration-seq entries))) | |
(defn get-sheet-names | |
"Returns a list of the sheet names in the Excel spreadsheet specified by the file path." | |
[^String file-path] | |
(with-open [^ZipFile zip-file (ZipFile. file-path)] | |
(let [ entries (.entries zip-file) | |
^ZipEntry workbook-entry (find-entry entries "xl/workbook.xml")] | |
(when workbook-entry | |
(with-open [^InputStream input-stream (.getInputStream zip-file workbook-entry)] | |
(let [^DocumentBuilderFactory dbf (DocumentBuilderFactory/newInstance) | |
_ (.setNamespaceAware dbf true) | |
^DocumentBuilder db (.newDocumentBuilder dbf) | |
^Document doc (.parse db input-stream) | |
^NodeList sheet-nodes (.getElementsByTagNameNS doc "http://schemas.openxmlformats.org/spreadsheetml/2006/main" "sheet") | |
sheet-names (keep (fn [i] | |
(let [^Node sheet-node (.item sheet-nodes i) | |
^NamedNodeMap attributes (.getAttributes sheet-node) | |
^Node name-attr (.getNamedItem attributes "name")] | |
(when name-attr (.getNodeValue name-attr)))) | |
(range (.getLength sheet-nodes)))] | |
sheet-names)))))) | |
(comment | |
(prn *e) | |
(get-sheet-names "file.xlsx") | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment