Skip to content

Instantly share code, notes, and snippets.

@eklitzke
Created June 21, 2012 22:40
Show Gist options
  • Select an option

  • Save eklitzke/2969052 to your computer and use it in GitHub Desktop.

Select an option

Save eklitzke/2969052 to your computer and use it in GitHub Desktop.
(ns cs.crawler)
(defn- read-forms
"Read all of the forms from a file, passed by name."
[file]
(let [reader-read-forms
(fn [rdr]
(let [inner-read (fn func [rslt]
(try
(func (conj rslt (read rdr)))
(catch Exception e rslt)))]
(inner-read [])))]
(with-open [r (java.io.PushbackReader.
(clojure.java.io/reader file))]
(binding [*read-eval* false]
(reader-read-forms r)))))
(defn- take-until
"Take elements from seq until stop? is true.
Return value includes the first element for which stop? was true (it
will be the last value in the response). If the stop? predicate was
never fulfilled, then the returned seq will be the entire seq."
[stop? seq]
(let [inner (fn [rslt s]
(if (empty? s) rslt
(let [hd (first s)
newrslt (conj rslt hd)]
(if (stop? hd)
newrslt (recur newrslt (rest s))))))]
(inner [] seq)))
(defn- parse-form
"Helper for parse-forms"
[form kind]
(println form)
(let [is-persistent #(or (instance? clojure.lang.PersistentVector %)
(instance? clojure.lang.PersistentList %))
decl (take-until is-persistent form)
decl-attrs (take-while (comp not is-persistent) decl)
num-attrs (count decl-attrs)
parse-templ {:kind kind
:name (first decl-attrs)
:doc ""
:attrs {}}]
(merge parse-templ
(case num-attrs
1 {}
2 (let [snd (second decl-attrs)]
(if (instance? String snd)
{:doc snd}
{:attrs snd}))
3 {:doc (second decl-attrs)
:attrs (nth decl-attrs 2)}
; we purposely omit a default case, to detect bad parses
))))
(defn parse-forms
"Lazily parse a sequence of forms, and return maps of parse data.
Currently we only allow the parsing of defn and defmacro forms. This
is easily extended -- for now, we have the restriction because those
are the only forms we are interested in."
[forms filename]
(map (partial merge {:file filename})
(filter (comp not nil?)
(map (fn [[hd & tl]]
(case hd
defn (parse-form tl :defn)
defmacro (parse-form tl :defmacro)
nil))
forms))))
(defn index-file
"Index a file, by parsing its top level and looking for interesting forms."
[file]
(try
(parse-forms (read-forms file) (.getPath (.toURI file)))
(catch Exception e (println "oh shit"))
(catch java.lang.UnsupportedOperationException e (println "uhoh"))))
(defn- get-matching-files
[directories pattern]
(let [dir-files (map clojure.java.io/file directories)
file-objs (apply concat (map file-seq dir-files))]
(filter #(re-matches pattern (.getName %)) file-objs)))
(defn index-directories
"Index the contents of the dirs listed in the seq 'directories'.
This function uses multiple threads, by sending each file to be indexed to
its own agent (by way of the index-file function)."
[directories pattern]
(let [agents (map #(send (agent %) index-file)
(get-matching-files directories pattern))]
(apply await agents)
(flatten (map deref agents))))
(defn index-directories-sequentially
"Like index-directories, but doesn't use agents (and hence runs in a single
thread)."
[directories pattern]
(map index-file (get-matching-files directories pattern)))
(defn -main [& argv]
(when (empty? argv)
(println "No directories specified for indexing!")
(System/exit 1))
(time (let [all-forms (index-directories-sequentially argv #"[^.].*\.clj$")]
; (println all-forms)
(println (str "Processed " (count all-forms)
" defn/defmacro forms"))))
(System/exit 0))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment