Created
March 30, 2016 12:38
-
-
Save tf0054/c6e3edba69e14e9dd50d5db9a27dd36d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env inlein | |
'{:dependencies [[org.clojure/clojure "1.8.0"] | |
[com.hypirion/primes "0.2.1"] | |
[org.clojure/tools.logging "0.2.6"] | |
[org.apache.poi/poi-ooxml "3.13"] | |
]} | |
(require '[com.hypirion.primes :as p]) | |
(:require [clojure.tools.logging :as log] | |
[clojure.string :as string] | |
[clojure.java.io :as io]) | |
(:import | |
(org.apache.poi.ss.usermodel Cell Row Sheet Workbook WorkbookFactory) | |
(org.apache.poi.hssf.record.crypto Biff8EncryptionKey)) | |
;; ## Cells | |
;; I've found it hard to trust the Cell Type and Cell Style for data such as | |
;; integers. In this version of the code I'm converting each cell to STRING | |
;; type before reading it as a string and returning the string value. | |
;; This should be the literal value typed into the cell, except in the case | |
;; of formulae where it should be the result. | |
;; Conversion of the strings to other data types should be done as an | |
;; additional step. | |
(defn get-cell-string-value | |
"Get the value of a cell as a string, by changing the cell type to 'string' | |
and then changing it back." | |
[cell] | |
(let [ct (.getCellType cell) | |
_ (.setCellType cell Cell/CELL_TYPE_STRING) | |
value (.getStringCellValue cell)] | |
(.setCellType cell ct) | |
value)) | |
;; ## Rows | |
;; Rows are made up of cells. We consider the first row to be a header, and | |
;; translate its values into keywords. Then we return each subsequent row | |
;; as a map from keys to cell values. | |
(defn to-keyword | |
"Take a string and return a properly formatted keyword." | |
[s] | |
(-> (or s "") | |
string/trim | |
string/lower-case | |
(string/replace #"\s+" "-") | |
keyword)) | |
;; Note: it would make sense to use the iterator for the row. However that | |
;; iterator just skips blank cells! So instead we use an uglier approach with | |
;; a list comprehension. This relies on the workbook's setMissingCellPolicy | |
;; in `load-workbook`. | |
;; See `incanter-excel` and [http://stackoverflow.com/questions/4929646/how-to-get-an-excel-blank-cell-value-in-apache-poi]() | |
(defn read-row | |
"Read all the cells in a row (including blanks) and return a list of values." | |
[row] | |
(for [i (range 0 (.getLastCellNum row))] | |
(get-cell-string-value (.getCell row (.intValue i))))) | |
;; ## Sheets | |
;; Workbooks are made up of sheets, which are made up of rows. | |
(defn read-sheet | |
"Given a workbook with an optional sheet name (default is 'Sheet1') and | |
and optional header row number (default is '1'), | |
return the data in the sheet as a vector of maps | |
using the headers from the header row as the keys." | |
([workbook] (read-sheet workbook "Sheet1" 1)) | |
([workbook sheet-name] (read-sheet workbook sheet-name 1)) | |
([workbook sheet-name header-row] | |
(log/debugf "Reading sheet '%s'" sheet-name) | |
(let [sheet (.getSheet workbook sheet-name) | |
rows (->> sheet (.iterator) iterator-seq (drop (dec header-row))) | |
headers (map to-keyword (read-row (first rows))) | |
data (map read-row (rest rows))] | |
(log/debugf "Read %d rows" (count rows)) | |
(vec (map (partial zipmap headers) data))))) | |
(defn list-sheets | |
"Return a list of all sheet names." | |
[workbook] | |
(for [i (range (.getNumberOfSheets workbook))] | |
(.getSheetName workbook i))) | |
(defn sheet-headers | |
"Returns the headers (in their original forms, not as keywords) for a given sheet." | |
[workbook sheet-name] | |
(let [sheet (.getSheet workbook sheet-name) | |
rows (->> sheet (.iterator) iterator-seq)] | |
(read-row (first rows)))) | |
;; ## Workbooks | |
;; An `.xlsx` file contains one workbook with one or more sheets. | |
(defn load-workbook | |
"Load a workbook from a string path." | |
[path] | |
(log/debugf "Loading workbook:" path) | |
(doto (WorkbookFactory/create (io/input-stream path)) | |
(.setMissingCellPolicy Row/CREATE_NULL_AS_BLANK))) | |
(defn load-pworkbook | |
"Load a workbook from a string path." | |
[path pass] | |
(log/debugf "Loading workbook:" path "with password") | |
(doto (WorkbookFactory/create (io/input-stream path) pass) | |
(.setMissingCellPolicy Row/CREATE_NULL_AS_BLANK))) | |
(defn save-workbook | |
[path wb] | |
(log/debugf "saving workbook:" path) | |
(.write wb (io/output-stream path))) | |
(when-not (first *command-line-args*) | |
(println "Usage:" (System/getProperty "$0") "prime-number") | |
(System/exit 1)) | |
(-> (first *command-line-args*) | |
(Long/parseLong) | |
(p/get) | |
println) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment