Created
June 29, 2012 01:33
-
-
Save eklitzke/3015151 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| (ns clojure-crawler.core | |
| (:require [cheshire.core :as cheshire] | |
| [clj-http.client :as client] | |
| [clojure.tools.cli :as cli])) | |
| (defn- read-forms | |
| "Read all of the forms from a file, passed by name." | |
| [file] | |
| (let [reader-read-forms | |
| (fn [rdr] | |
| (let [inner-read (fn func [rslt] | |
| (try | |
| (func (conj rslt (read rdr))) | |
| (catch Exception e rslt)))] | |
| (inner-read [])))] | |
| (with-open [r (java.io.PushbackReader. | |
| (clojure.java.io/reader file))] | |
| (binding [*read-eval* false] | |
| (reader-read-forms r))))) | |
| (defn- take-until | |
| "Take elements from seq until stop? is true." | |
| [stop? seq] | |
| (let [inner (fn [rslt s] | |
| (if (empty? s) rslt | |
| (let [hd (first s)] | |
| (if (stop? hd) | |
| rslt (recur (conj rslt hd) (rest s))))))] | |
| (inner [] seq))) | |
| (defn- parse-form | |
| "Helper for parse-forms" | |
| [form kind] | |
| (let [is-persistent #(or (instance? clojure.lang.PersistentVector %) | |
| (instance? clojure.lang.PersistentList %)) | |
| decl (take-until is-persistent form) | |
| num-attrs (count decl) | |
| impl (drop num-attrs form) | |
| first-impl (first impl) | |
| parse-templ {:kind kind | |
| :name (first decl) | |
| :doc "" | |
| :args [] | |
| :attrs {}} | |
| noargs-parse (merge parse-templ | |
| (case num-attrs | |
| 1 {} | |
| 2 (let [snd (second decl)] | |
| (if (instance? String snd) | |
| {:doc snd} | |
| {:attrs snd})) | |
| 3 {:doc (second decl) | |
| :attrs (nth decl 2)})) | |
| make-args-parse #(merge noargs-parse {:args %})] | |
| (if (instance? clojure.lang.PersistentVector first-impl) | |
| [(make-args-parse first-impl)] | |
| (map (comp make-args-parse first) impl)))) | |
| (defn- get-last-definitions-helper | |
| "Helper for get-last definition -- the list of forms should already | |
| be reversed." | |
| [seen result remaining] | |
| (if (empty? remaining) | |
| result | |
| (let [form (first remaining) | |
| name (form :name) | |
| remaining* (rest remaining)] | |
| (if (nil? (seen name)) | |
| (recur (conj seen name) (conj result form) remaining*) | |
| (recur seen result remaining*))))) | |
| (defn- get-last-definitions | |
| "Filter a list of forms and get only the last instance for each | |
| name. This is required because Clojure allows redefintion of a | |
| defn/defmacro, and therefore we only want to use the last form | |
| defined. | |
| This implementation reverses the list of forms passed in, and | |
| therefore forces the evaluation of the forms seq." | |
| [forms] | |
| (get-last-definitions-helper | |
| #{} [] (reverse forms))) | |
| (defn- not-private? | |
| "Tests if a function is private, based on its attributes." | |
| [parse] | |
| (not (true? ((parse :attrs) :private)))) | |
| (defn parse-forms | |
| "Parse a sequence of forms, and return maps of parse data. | |
| Currently we only allow the parsing of defn and defmacro forms. This | |
| is easily extended -- for now, we have the restriction because those | |
| are the only forms we are interested in." | |
| [forms filename] | |
| {:filename filename | |
| :lang "clojure" | |
| :parses (filter #(and (not (nil? %)) | |
| (not-private? %)) | |
| (flatten | |
| (map (fn [lst] | |
| (if (= (type lst) clojure.lang.PersistentList) | |
| (let [hd (first lst) | |
| tl (rest lst)] | |
| (case hd | |
| defn (parse-form tl :defn) | |
| defmacro (parse-form tl :defmacro) | |
| nil)))) | |
| forms)))}) | |
| (defn index-file | |
| "Index a file, by parsing its top level and looking for interesting forms." | |
| [file] | |
| (parse-forms (read-forms file) (.getPath (.toURI file)))) | |
| (defn- get-matching-files | |
| [directories pattern] | |
| (let [dir-files (map clojure.java.io/file directories) | |
| file-objs (apply concat (map file-seq dir-files))] | |
| (filter #(re-matches pattern (.getName %)) file-objs))) | |
| #_(defn index-directories | |
| "Index the contents of the dirs listed in the seq 'directories'. | |
| This function uses multiple threads, by sending each file to be indexed to | |
| its own agent (by way of the index-file function)." | |
| [directories pattern] | |
| (let [agents (map #(send (agent %) index-file) | |
| (get-matching-files directories pattern))] | |
| (apply await agents) | |
| (flatten (map deref agents)))) | |
| (defn index-directories-sequentially | |
| "Like index-directories, but doesn't use agents (and hence runs in a single | |
| thread)." | |
| [directories pattern] | |
| (map index-file (get-matching-files directories pattern))) | |
| (defn upload-results | |
| [results endpoint api-key] | |
| (doseq [file-results results] | |
| (let [body (merge file-results {:parses (cheshire/generate-string | |
| (file-results :parses)) | |
| :api-key api-key})] | |
| (client/post endpoint body)))) | |
| (defn -main [& argv] | |
| (let [[options args banner] | |
| (cli/cli argv | |
| ["-h" "--help" "Show help" :default false :flag true] | |
| ["--endpoint" :default "http://codesear.ch/api/upload"] | |
| ["--api-key" :default nil] | |
| ["--test" "Run in test mode (don't upload parses)" | |
| :default false :flag true])] | |
| (when (options :help) | |
| (println banner) | |
| (System/exit 0)) | |
| (when (empty? args) | |
| (println "No directories specified for indexing!") | |
| (System/exit 1)) | |
| (let [results (index-directories-sequentially args #"[^.].*\.clj$") | |
| num-results (reduce + (map (comp count :parses) results))] | |
| (if (options :test) | |
| (println (str "Parsed " num-results " forms, exiting...")) | |
| (do | |
| (println (str "Parsed " num-results " forms, uploading...")) | |
| (upload-results results (options :endpoint) (options :api-key)) | |
| (println "Successfully uploaded results"))))) | |
| (System/exit 0)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment