Last active
August 29, 2015 14:07
-
-
Save glts/b382b72ef29165b7857a to your computer and use it in GitHub Desktop.
Trampolining lexer for J sentences
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Trampolining lexer for J sentences | |
;; Predicates | |
(def digits (set "0123456789_")) | |
(def alphabet (set "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")) | |
(def space? (partial contains? #{\space \tab})) | |
(def alpha-n? (partial contains? (disj alphabet \N))) | |
(def n? (partial = \N)) | |
(def b? (partial = \B)) | |
(def alnum? (partial contains? (into digits alphabet))) | |
(def alnum-b? (partial contains? (disj (into digits alphabet) \B))) | |
(def digit? (partial contains? digits)) | |
(def digit'? (partial contains? (conj (into digits alphabet) \.))) | |
(def period? (partial = \.)) | |
(def colon? (partial = \:)) | |
(def dots? (partial contains? #{\. \:})) | |
(def quote? (partial = \')) | |
(defmacro emit [f cs lex token & xs] | |
`(~f ~cs (conj ~lex ~token) ~@xs)) | |
(defmacro transit [f cs lex & xs] | |
`(~f ~cs ~lex ~@xs)) | |
;; State functions | |
(declare | |
s-space | |
s-space' | |
s-other | |
s-alpha | |
s-n | |
s-nb | |
s-nz | |
s-digits | |
s-even-quotes | |
s-quote) | |
(defn s-space [[c & cs] lex & curr] | |
(cond | |
(space? c) (recur cs lex nil) | |
(alpha-n? c) (transit s-alpha cs lex (str c)) | |
(n? c) (transit s-n cs lex (str c)) | |
(digit? c) (transit s-digits cs lex (str c)) | |
(quote? c) (transit s-quote cs lex (str c)) | |
c (transit s-other cs lex (str c)) | |
:else lex)) | |
(defn s-space' [[c & cs] lex curr spaces] | |
(cond | |
(space? c) (recur cs lex (str curr) (str spaces c)) | |
(alpha-n? c) (emit s-alpha cs lex [:digits curr] (str c)) | |
(n? c) (emit s-n cs lex [:digits curr] (str c)) | |
(digit? c) (transit s-digits cs lex (str curr spaces c)) | |
(quote? c) (emit s-quote cs lex [:digits curr] (str c)) | |
c (emit s-other cs lex [:digits curr] (str c)) | |
:else (conj lex [:digits curr]))) | |
(defn s-other [[c & cs] lex curr] | |
(cond | |
(dots? c) (recur cs lex (str curr c)) | |
(space? c) (emit s-space cs lex [:other curr]) | |
(alpha-n? c) (emit s-alpha cs lex [:other curr] (str c)) | |
(n? c) (emit s-n cs lex [:other curr] (str c)) | |
(digit? c) (emit s-digits cs lex [:other curr] (str c)) | |
(quote? c) (emit s-quote cs lex [:other curr] (str c)) | |
c (recur cs (conj lex [:other curr]) (str c)) | |
:else (conj lex [:other curr]))) | |
(defn s-alpha [[c & cs] lex curr] | |
(cond | |
(alnum? c) (recur cs lex (str curr c)) | |
(space? c) (emit s-space cs lex [:alpha curr]) | |
(dots? c) (transit s-other cs lex (str curr c)) | |
(quote? c) (emit s-quote cs lex [:alpha curr] (str c)) | |
c (emit s-other cs lex [:alpha curr] (str c)) | |
:else (conj lex [:alpha curr]))) | |
(defn s-n [[c & cs] lex curr] | |
(cond | |
(space? c) (emit s-space cs lex [:alpha curr]) | |
(alnum-b? c) (transit s-alpha cs lex (str curr c)) | |
(b? c) (transit s-nb cs lex (str curr c)) | |
(dots? c) (transit s-other cs lex (str curr c)) | |
(quote? c) (emit s-quote cs lex [:alpha curr] (str c)) | |
c (emit s-other cs lex [:alpha curr] (str c)) | |
:else (conj lex [:alpha curr]))) | |
(defn s-nb [[c & cs] lex curr] | |
(cond | |
(space? c) (emit s-space cs lex [:alpha curr]) | |
(alnum? c) (transit s-alpha cs lex (str curr c)) | |
(period? c) (transit s-nz cs lex (str curr c)) | |
(colon? c) (transit s-other cs lex (str curr c)) | |
(quote? c) (emit s-quote cs lex [:alpha curr] (str c)) | |
c (emit s-other cs lex [:alpha curr] (str c)) | |
:else (conj lex [:alpha curr]))) | |
(defn s-nz [[c & cs :as s] lex curr] | |
(if (dots? c) | |
(transit s-other cs lex (str curr c)) | |
(conj lex [:comment (apply str curr s)]))) | |
(defn s-digits [[c & cs] lex curr] | |
(cond | |
(digit'? c) (recur cs lex (str curr c)) | |
(space? c) (transit s-space' cs lex (str curr) (str c)) | |
(colon? c) (transit s-other cs lex (str curr c)) | |
(quote? c) (emit s-quote cs lex [:digits curr] (str c)) | |
c (emit s-other cs lex [:digits curr] (str c)) | |
:else (conj lex [:digits curr]))) | |
(defn s-quote [[c & cs] lex curr] | |
(cond | |
(quote? c) (transit s-even-quotes cs lex (str curr c)) | |
c (recur cs lex (str curr c)) | |
:else (conj lex [:error "open quote"]))) | |
(defn s-even-quotes [[c & cs] lex curr] | |
(cond | |
(space? c) (emit s-space cs lex [:string curr]) | |
(alpha-n? c) (emit s-alpha cs lex [:string curr] (str c)) | |
(n? c) (emit s-n cs lex [:string curr] (str c)) | |
(digit? c) (emit s-digits cs lex [:string curr] (str c)) | |
(quote? c) (transit s-quote cs lex (str curr c)) | |
c (emit s-other cs lex [:string curr] (str c)) | |
:else (conj lex [:string curr]))) | |
;; Lexer | |
(defn run-lexer [s] | |
{:pre [(re-matches #"[\p{Graph}\p{Blank}]*" s)]} | |
(trampoline s-space s [])) | |
(run-lexer "6754e3 8b2.762 +./ 89.89 'what '' ief' blbab NB. what") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment