Last active
May 22, 2018 16:01
-
-
Save kirked/ecf519a3b5d809c6eac5686a6bc0b8bd to your computer and use it in GitHub Desktop.
Rudimentary EDN problem finder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(defn parse-string | |
"Parse a literal string, returning the [line col index] of the following character. | |
start-ix should point to the first character inside the literal string, not the opening quote." | |
[start-line start-col start-ix text] | |
(loop [line start-line | |
col start-col | |
ix start-ix] | |
(if (>= ix (.length text)) | |
(throw (RuntimeException. (str "error: end of input in string at " line ":" col " (offset " ix ")"))) | |
(let [chr (.charAt text ix)] | |
(cond | |
(= chr \\) (recur line (+ col 2) (+ ix 2)) | |
(= chr \") [line (inc col) (inc ix)] | |
(= chr \newline) (recur (inc line) 1 (inc ix)) | |
:otherwise (recur line (inc col) (inc ix))))))) | |
(defn parse-comment | |
"Parse a literal comment, returning the index of the first character of the next line." | |
[start-ix text] | |
(loop [ix start-ix] | |
(if (>= ix (.length text)) | |
ix | |
(if (= \newline (.charAt text ix)) | |
(inc ix) | |
(recur (inc ix)))))) | |
(defn parse-collection | |
"Parse a literal collection, returning the [line col index] of the following character. | |
start-ix should point to the first character inside the literal collection, not the opening character." | |
[term-chr start-line start-col start-ix text] | |
(loop [line start-line | |
col start-col | |
ix start-ix | |
forms 0 | |
ws-last true] | |
(if (>= ix (.length text)) | |
(throw (RuntimeException. (str "error: end of input in collection at " line ":" col " (offset " ix ")"))) | |
(let [chr (.charAt text ix)] | |
(cond | |
(= chr term-chr) (do | |
(when (and (= term-chr \}) (odd? forms)) | |
(println "odd number of map values detected at" (str line ":" col " (offset " ix ")"))) | |
[line (inc col) (inc ix) forms]) | |
(= chr \") (let [[new-line new-col new-ix] (parse-string line (inc col) (inc ix) text)] | |
(recur new-line new-col new-ix (inc forms) false)) | |
(= chr \{) (let [[new-line new-col new-ix] (parse-collection \} line (inc col) (inc ix) text)] | |
(recur new-line new-col new-ix (inc forms) false)) | |
(= chr \[) (let [[new-line new-col new-ix] (parse-collection \] line (inc col) (inc ix) text)] | |
(recur new-line new-col new-ix (inc forms) false)) | |
(= chr \() (let [[new-line new-col new-ix] (parse-collection \) line (inc col) (inc ix) text)] | |
(recur new-line new-col new-ix (inc forms) false)) | |
(= chr \;) (recur (inc line) 1 (parse-comment (inc ix) text) forms true) | |
(= chr \newline) (recur (inc line) 1 (inc ix) forms true) | |
(contains? #{\space \tab \,} chr) | |
(recur line (inc col) (inc ix) forms true) | |
:otherwise (recur line (inc col) (inc ix) (if ws-last (inc forms) forms) false)))))) | |
(defn parse-top | |
[text] | |
(if (= 0 (.length text)) | |
"empty string" | |
(let [chr (.charAt text 0)] | |
(cond | |
(= chr \;) (parse-comment 1 2 1 text) | |
(= chr \") (parse-string 1 2 1 text) | |
(= chr \{) (parse-collection \} 1 2 1 text) | |
(= chr \() (parse-collection \) 1 2 1 text) | |
(= chr \[) (parse-collection \] 1 2 1 text) | |
:otherwise "invalid EDN")))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I had a large EDN file that the real reader just reported
maps must contain an even number of forms
, which of course really wasn't too helpful.So this is a rudimentary EDN reader that will report where problems occur.
Note: this code needs to be adjusted for any file that starts with a comment or whitespace; comments will only parse the comment and return. beginning whitespace will currently report invalid EDN. all this is in
parse-top
.The code found my problem and so I'm on to doing production things until I have time to enhance parse-top.