Created
January 16, 2011 22:32
-
-
Save budu/782221 to your computer and use it in GitHub Desktop.
Prototype a new Marginalia parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(use '(clojure.contrib [reflect :only [get-field]]) | |
'(clojure [string :only [join replace]])) | |
(deftype Comment [content]) | |
(defmethod print-method Comment [comment ^String out] | |
(.write out (str \" (.content comment) \"))) | |
(defn read-comment [reader semicolon] | |
(let [sb (StringBuilder.)] | |
(.append sb semicolon) | |
(loop [ch (char (.read reader))] | |
(if (or (= ch \newline) | |
(= ch \return)) | |
(Comment. (.toString sb)) | |
(do | |
(.append sb (Character/toString ch)) | |
(recur (char (.read reader)))))))) | |
(defn set-comment-reader [reader] | |
(aset (get-field clojure.lang.LispReader :macros nil) | |
(int \;) | |
reader)) | |
(defn skip-spaces [rdr] | |
(loop [c (.read rdr)] | |
(cond (= c -1) nil | |
(#{\space \tab \return \newline \,} (char c)) | |
(recur (.read rdr)) | |
:else (.unread rdr c)))) | |
(defn parse* [reader] | |
(take-while | |
:form | |
(repeatedly | |
(fn [] | |
(skip-spaces reader) | |
(let [start (.getLineNumber reader) | |
form (. clojure.lang.LispReader | |
(read reader false nil false)) | |
end (if (instance? Comment form) | |
start | |
(.getLineNumber reader))] | |
{:form form :start start :end end}))))) | |
(defn comment? [o] | |
(->> o :form (instance? Comment))) | |
(defn strip-docstring [docstring raw] | |
(-> raw | |
(replace (str \" docstring \") "") | |
(replace #"\n\s*\n" "\n") | |
(replace #"\n\s*\)" ")"))) | |
(defn extract-docstring [form raw nspace-sym] | |
(when (re-find #"^(def|ns)" (-> form first name)) | |
(let [sym (-> form second) | |
_ (when-not nspace-sym (require sym)) | |
nspace (find-ns sym)] | |
(let [docstring (if nspace | |
(-> nspace meta :doc) | |
(-> `(var ~(symbol (str nspace-sym) (str sym))) | |
eval meta :doc))] | |
[docstring | |
(strip-docstring docstring raw) | |
(if nspace sym nspace-sym)])))) | |
(defn merge-comments [f s] | |
{:form (Comment. (str (-> f :form .content) "\n" | |
(-> s :form .content))) | |
:start (:start f) | |
:end (:end s)}) | |
(defn arrange-in-sections [parsed-code raw-code] | |
(loop [sections [] | |
f (first parsed-code) | |
s (second parsed-code) | |
nn (nnext parsed-code) | |
nspace nil] | |
(if f | |
(cond | |
;; merging comments block | |
(and (comment? f) (comment? s) | |
(= (-> f :end) (-> s :start dec))) | |
(recur sections (merge-comments f s) | |
(first nn) (next nn) | |
nspace) | |
;; adding comment section | |
(comment? f) | |
(recur (conj sections (assoc f | |
:type :comment | |
:raw (-> f :form .content))) | |
s | |
(first nn) (next nn) | |
nspace) | |
;; adding code section | |
:else | |
(let [raw-code (join "\n" (subvec raw-code (-> f :start dec) (:end f))) | |
[docstring raw-code nspace] | |
(extract-docstring (:form f) raw-code nspace)] | |
(recur (conj sections (assoc f | |
:type :code | |
:raw raw-code | |
:docstring docstring)) | |
s | |
(first nn) (next nn) | |
nspace))) | |
sections))) | |
(defn parse [source-string] | |
(let [make-reader #(java.io.BufferedReader. | |
(java.io.StringReader. (str source-string "\n"))) | |
lines (vec (line-seq (make-reader))) | |
reader (clojure.lang.LineNumberingPushbackReader. (make-reader)) | |
old-cmt-rdr (aget (get-field clojure.lang.LispReader :macros nil) (int \;))] | |
(try | |
(set-comment-reader read-comment) | |
(let [parsed-code (doall (parse* reader))] | |
(set-comment-reader old-cmt-rdr) | |
(arrange-in-sections parsed-code lines)) | |
(catch Exception e | |
(set-comment-reader old-cmt-rdr) | |
(throw e))))) | |
(defn parse-file [file] | |
(parse (slurp file))) |
Fixed a bug in extract-docstring
when it had to require a namespace for the first time.
It's now being maintained in its own branch of my Marginalia fork: https://github.com/budu/marginalia/tree/new-parser
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Changed the
parse
function to put back the previously set CommentReader instead of the default one.