Last active
January 15, 2016 02:38
-
-
Save gfredericks/95eb9142071cc0760cad to your computer and use it in GitHub Desktop.
Clojure peg memoization example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns clojure-peg-memoization-example | |
"A followup to Sean Cribbs' presentation on packrat parsers: | |
https://github.com/seancribbs/pwl-chicago-1/ | |
The goal is to show that in an impure language like Clojure we can | |
bolt on memoization after the fact and get the same performance | |
advantage as the Haskell implementation without bending over | |
backwards -- i.e., we maintain the same algorithm structure as a | |
recursive descent parser. | |
There are a few practical-performance and style issues with this | |
code -- I was aiming to merely demonstrate the point in the previous | |
paragraph and nothing more. | |
All the parsing functions in this namespace take a single string | |
argument and return a pair [val remaining-string], where val is | |
an integer, as in the Haskell code, or nil if the string does | |
not match.") | |
(declare p-additive) | |
(defn p-decimal | |
[s] | |
(when (<= (int \0) (int (first s)) (int \9)) | |
[(Long/parseLong (subs s 0 1)) | |
(subs s 1)])) | |
(defn p-primary | |
[s] | |
(if (= \( (first s)) | |
(if-let [[val s'] (p-additive (subs s 1))] | |
(if (= \) (first s')) | |
[val (subs s' 1)] | |
(p-decimal s)) | |
(p-decimal s)) | |
(p-decimal s))) | |
(defn p-multitive | |
[s] | |
(if-let [[val1 s'] (p-primary s)] | |
(if (= \* (first s')) | |
(if-let [[val2 s''] (p-multitive (subs s' 1))] | |
[(* val1 val2) s''] | |
(p-primary s)) | |
(p-primary s)) | |
(p-primary s))) | |
(defn p-additive | |
[s] | |
(if-let [[val1 s'] (p-multitive s)] | |
(if (= \+ (first s')) | |
(if-let [[val2 s''] (p-additive (subs s' 1))] | |
[(+ val1 val2) s''] | |
(p-multitive s)) | |
(p-multitive s)) | |
(p-multitive s))) | |
;; add tracing logs | |
(doseq [v [#'p-decimal #'p-primary #'p-multitive #'p-additive] | |
:let [name (-> v meta :name)]] | |
(alter-var-root v (fn [f] | |
(fn [& args] | |
(let [ret (apply f args)] | |
(printf "%s ==> %s\n" | |
(pr-str (list* name args)) | |
(pr-str ret)) | |
ret))))) | |
;; Try it without memoization: | |
;; | |
;; user> (p-additive "2*(3+4)") | |
;; (p-decimal "2*(3+4)") ==> [2 "*(3+4)"] | |
;; (p-primary "2*(3+4)") ==> [2 "*(3+4)"] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-multitive "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-additive "4)") ==> [4 ")"] | |
;; (p-additive "3+4)") ==> [7 ")"] | |
;; (p-primary "(3+4)") ==> [7 ""] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-multitive "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-additive "4)") ==> [4 ")"] | |
;; (p-additive "3+4)") ==> [7 ")"] | |
;; (p-primary "(3+4)") ==> [7 ""] | |
;; (p-multitive "(3+4)") ==> [7 ""] | |
;; (p-multitive "2*(3+4)") ==> [14 ""] | |
;; (p-decimal "2*(3+4)") ==> [2 "*(3+4)"] | |
;; (p-primary "2*(3+4)") ==> [2 "*(3+4)"] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-multitive "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-additive "4)") ==> [4 ")"] | |
;; (p-additive "3+4)") ==> [7 ")"] | |
;; (p-primary "(3+4)") ==> [7 ""] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-multitive "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-additive "4)") ==> [4 ")"] | |
;; (p-additive "3+4)") ==> [7 ")"] | |
;; (p-primary "(3+4)") ==> [7 ""] | |
;; (p-multitive "(3+4)") ==> [7 ""] | |
;; (p-multitive "2*(3+4)") ==> [14 ""] | |
;; (p-additive "2*(3+4)") ==> [14 ""] | |
;; [14 ""] | |
;; memoize everything | |
(doseq [v [#'p-decimal #'p-primary #'p-multitive #'p-additive]] | |
(alter-var-root v memoize)) | |
;; Run it again with memoization: | |
;; | |
;; user> (p-additive "2*(3+4)") | |
;; (p-decimal "2*(3+4)") ==> [2 "*(3+4)"] | |
;; (p-primary "2*(3+4)") ==> [2 "*(3+4)"] | |
;; (p-decimal "3+4)") ==> [3 "+4)"] | |
;; (p-primary "3+4)") ==> [3 "+4)"] | |
;; (p-multitive "3+4)") ==> [3 "+4)"] | |
;; (p-decimal "4)") ==> [4 ")"] | |
;; (p-primary "4)") ==> [4 ")"] | |
;; (p-multitive "4)") ==> [4 ")"] | |
;; (p-additive "4)") ==> [4 ")"] | |
;; (p-additive "3+4)") ==> [7 ")"] | |
;; (p-primary "(3+4)") ==> [7 ""] | |
;; (p-multitive "(3+4)") ==> [7 ""] | |
;; (p-multitive "2*(3+4)") ==> [14 ""] | |
;; (p-additive "2*(3+4)") ==> [14 ""] | |
;; [14 ""] |
This is also not practical as is, since it's a global memoization -- i.e., it would be leaky for multiple parses.
Clojure actually doesn't have a built-in easy way to define locally-memoized mutually recursive functions, but I think a macro similar to this could do it.
Or maybe just using the memoized-fn
macro repeatedly would work.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yes, aside from the direct memoization of function calls you do, this is essentially the strategy I took in
neotoma
: a combinator that handles the memoization by wrapping calls to the parsing functions.I think the effect is the same, but the approach in the paper is different in that the Haskell code memoizes implicitly by virtue of whole-program laziness and data-recursion.