Created
June 21, 2012 22:40
-
-
Save eklitzke/2969052 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns cs.crawler) | |
| (defn- read-forms | |
| "Read all of the forms from a file, passed by name." | |
| [file] | |
| (let [reader-read-forms | |
| (fn [rdr] | |
| (let [inner-read (fn func [rslt] | |
| (try | |
| (func (conj rslt (read rdr))) | |
| (catch Exception e rslt)))] | |
| (inner-read [])))] | |
| (with-open [r (java.io.PushbackReader. | |
| (clojure.java.io/reader file))] | |
| (binding [*read-eval* false] | |
| (reader-read-forms r))))) | |
| (defn- take-until | |
| "Take elements from seq until stop? is true. | |
| Return value includes the first element for which stop? was true (it | |
| will be the last value in the response). If the stop? predicate was | |
| never fulfilled, then the returned seq will be the entire seq." | |
| [stop? seq] | |
| (let [inner (fn [rslt s] | |
| (if (empty? s) rslt | |
| (let [hd (first s) | |
| newrslt (conj rslt hd)] | |
| (if (stop? hd) | |
| newrslt (recur newrslt (rest s))))))] | |
| (inner [] seq))) | |
| (defn- parse-form | |
| "Helper for parse-forms" | |
| [form kind] | |
| (println form) | |
| (let [is-persistent #(or (instance? clojure.lang.PersistentVector %) | |
| (instance? clojure.lang.PersistentList %)) | |
| decl (take-until is-persistent form) | |
| decl-attrs (take-while (comp not is-persistent) decl) | |
| num-attrs (count decl-attrs) | |
| parse-templ {:kind kind | |
| :name (first decl-attrs) | |
| :doc "" | |
| :attrs {}}] | |
| (merge parse-templ | |
| (case num-attrs | |
| 1 {} | |
| 2 (let [snd (second decl-attrs)] | |
| (if (instance? String snd) | |
| {:doc snd} | |
| {:attrs snd})) | |
| 3 {:doc (second decl-attrs) | |
| :attrs (nth decl-attrs 2)} | |
| ; we purposely omit a default case, to detect bad parses | |
| )))) | |
| (defn parse-forms | |
| "Lazily parse a sequence of forms, and return maps of parse data. | |
| Currently we only allow the parsing of defn and defmacro forms. This | |
| is easily extended -- for now, we have the restriction because those | |
| are the only forms we are interested in." | |
| [forms filename] | |
| (map (partial merge {:file filename}) | |
| (filter (comp not nil?) | |
| (map (fn [[hd & tl]] | |
| (case hd | |
| defn (parse-form tl :defn) | |
| defmacro (parse-form tl :defmacro) | |
| nil)) | |
| forms)))) | |
| (defn index-file | |
| "Index a file, by parsing its top level and looking for interesting forms." | |
| [file] | |
| (try | |
| (parse-forms (read-forms file) (.getPath (.toURI file))) | |
| (catch Exception e (println "oh shit")) | |
| (catch java.lang.UnsupportedOperationException e (println "uhoh")))) | |
| (defn- get-matching-files | |
| [directories pattern] | |
| (let [dir-files (map clojure.java.io/file directories) | |
| file-objs (apply concat (map file-seq dir-files))] | |
| (filter #(re-matches pattern (.getName %)) file-objs))) | |
| (defn index-directories | |
| "Index the contents of the dirs listed in the seq 'directories'. | |
| This function uses multiple threads, by sending each file to be indexed to | |
| its own agent (by way of the index-file function)." | |
| [directories pattern] | |
| (let [agents (map #(send (agent %) index-file) | |
| (get-matching-files directories pattern))] | |
| (apply await agents) | |
| (flatten (map deref agents)))) | |
| (defn index-directories-sequentially | |
| "Like index-directories, but doesn't use agents (and hence runs in a single | |
| thread)." | |
| [directories pattern] | |
| (map index-file (get-matching-files directories pattern))) | |
| (defn -main [& argv] | |
| (when (empty? argv) | |
| (println "No directories specified for indexing!") | |
| (System/exit 1)) | |
| (time (let [all-forms (index-directories-sequentially argv #"[^.].*\.clj$")] | |
| ; (println all-forms) | |
| (println (str "Processed " (count all-forms) | |
| " defn/defmacro forms")))) | |
| (System/exit 0)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment