Created
May 9, 2012 20:55
-
-
Save djKianoosh/2648751 to your computer and use it in GitHub Desktop.
Some Clojure functions to help read custom access log files into maps
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn comment? [s] | |
(.startsWith s "#")) | |
(defn not-comment? [s] | |
(not (comment? s))) | |
(defn remove-comments [line] | |
(filter not-comment? line)) | |
(defn nil-if-hyphen [s] | |
(if (not= s "-") s)) | |
(defn str->int | |
"Returns an int if the string parses as an int, otherwise returns input unaltered" | |
[str] | |
(if (re-matches (re-pattern "\\d+\\.?\\d+") str) | |
(read-string str) | |
str)) | |
;; #Fields: date time s-ip cs-method cs-uri-stem cs-uri-query s-port cs-username c-ip cs(User-Agent) sc-status sc-substatus sc-win32-status time-taken | |
(defn is-format-line? [s] | |
(= (.substring s 0 8) "#Fields:")) | |
(defn find-first-format-line [lines] | |
(first (filter is-format-line? lines))) | |
(defn read-format-into-keywords [s] | |
(map keyword (filter not-comment? (clojure.string/split s #" ")))) | |
(defn read-format-from-file [f] | |
(let [file-contents (line-seq (clojure.java.io/reader (clojure.java.io/file f)))] | |
(read-format-into-keywords (find-first-format-line file-contents) ))) | |
(defn zipmap-line-data | |
"Returns a map with the keywords mapped to data from a log line." | |
[columns line] | |
(let [line-data (map str->int (re-seq #"[^ ']+|'[^']*'" line))] | |
(zipmap columns line-data))) | |
(defn read-data-from-file [file] | |
(let [columns (read-format-from-file file) | |
file-without-comments (remove-comments (line-seq (clojure.java.io/reader (clojure.java.io/file file))))] | |
(map #(zipmap-line-data columns %1) file-without-comments))) | |
(def summarize (juxt :time :c-ip :time-taken :cs-uri)) | |
;;; Example usage | |
(def data (read-data-from-file "path/to/file.log")) | |
(def longest200 (take 200 (reverse (sort-by :time-taken data)))) | |
(pprint (sort (map summarize longest200))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment