Created
May 12, 2018 05:55
-
-
Save Conaws/e4bcaba72766f443f19d814a341fcce0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns history.parser | |
(:require [instaparse.core :as insta] | |
[debux.core :as dbg] | |
[history.util :refer [defc p makeInt]] | |
[clojure.pprint :refer [pprint] :as pp] | |
[clojure.string :as str] | |
[clojure.core.async :as async :refer [>! go go-loop <! <!! chan put!]])) | |
(defn filter-ke-tree | |
([coll] (filter-ke-tree coll #{})) | |
([x without] | |
#_(do | |
(print "\nWithout " ) | |
(print without) | |
(print "\nFilterKE: ") | |
(print x) | |
(print "\n\n-------------")) | |
(cond | |
(and (map? x) (:keep x)) | |
(let [newexclude (into #{} (:exclude x)) | |
exclude (clojure.set/union without newexclude)] | |
(-> x | |
:keep | |
(filter-ke-tree exclude) | |
flatten)) | |
(coll? x) | |
(->> x | |
(remove string? ) | |
(remove keyword?) | |
(remove (partial contains? without)) | |
(map #(filter-ke-tree % without))) | |
:else x))) | |
(def this-file "./src/clj/history/parser.clj") | |
(def between-s #"(?<=\))(\n)+(?=\()") | |
(def between-s-or-comment #"(?<=[\)(;.*)])(\n)+(?=[\((;.*)])") | |
(def get-name-from-vectors (comp second #(str/split % #"[ \n]") first)) | |
(defn ip [pstring parsable] | |
((insta/parser pstring) parsable)) | |
(defn ipt [pstring parsable] | |
((insta/parser pstring) parsable :total true )) | |
(defn ipp [pstring parsable] | |
((insta/parser pstring) parsable :partial true )) | |
(defn comparse [& s] | |
(apply str (vec (interleave s (repeat "\n"))))) | |
(defn libify [& m] (into {} (map (juxt :name identity) m))) | |
; not distinguising quoted symbols right now | |
(def clj-content | |
" S = content+ | |
<content> = w? (string / s / comment / regex / vector / map / set / symbol / keyword / number) w? | |
<w> = <#'[\\s,]+'> | |
s = ('#_' | '\\'' | '#' | '~' | '`' )? op (content | '%' )* cp | |
<comment> = #';.*' | |
vector = <'['> content* <']'> | |
map = <'{'> kv* <'}'> | |
kv = <w?> key <w?> binding | |
set = <'#{'> content* <'}'> | |
<key> = content | |
<binding> = content | |
<op> = <'('> | |
<cp> = <')'> | |
string = <'\"'> not-quote* <'\"'> | |
<not-quote> = (( <comment> | '\\\\''\"' ) / !'\"' #'[\\s\\S]') | |
symbol = (#'[^\\s:\\[\\(\\{\\]\\)\\}]+') keyword? | |
old-symbol = ( word | crazys )+ ( ':' | '\\'' | '/' |'#' | word | number | crazys)* | |
<crazys> = '*' | '$' | '!' | '&' | '.' | '+' | '?' | '<' | '>' | '-' | '_' | '=' | |
regex = <'#'> string | |
keyword = (<':'> (#'[^\\s\\[\\(\\{\\]\\)\\}]')* (#'[^\\s:\\[\\(\\{\\]\\)\\}]')+) | |
<word> = #'[a-zA-Z]+' | |
math = '+' | '-' | '/' | '=' | '*' | '<' | '<=' | '>' | '>=' | |
number = #'[0-9]+'") | |
#_(def form-transform1 | |
{:s list | |
:string str | |
:number makeInt | |
:keyword (comp keyword str) | |
:symbol (comp symbol str) | |
:map (comp (p into {}) vector) | |
:kv vector | |
:regex symbol | |
:vector vector | |
:set (comp (p into {}) vector) | |
:body str}) | |
#_(->> (ip clj-content (str '(def keep-exclude {:keep [:a :b :c {:exclude [1 2] | |
:keep [1 2 3 4 :a :b 5]}] | |
:exclude [:a :b]}))) | |
(insta/transform form-transform)) | |
#_(def testparse1 | |
(-> this-file | |
slurp | |
(str/split between-s) | |
(->> | |
(map (juxt | |
#((insta/parser clj-content) % ) | |
identity)) | |
(filter (comp (p insta/get-failure) first)) | |
print))) | |
(def test1 (str '(def clj-content | |
" S = content+ | |
<content> = (<w>? (string / s / comment / vector / map / set / symbol / keyword / number) <w>? ) | |
w = #'[\\s,]+' | |
s = ('#_' | '\\'' | '#' )? op (content | '%' )* cp | |
<comment> = #';.*' | |
vector = <'['> content* <']'> | |
map = <'{'> kv* <'}'> | |
kv = <w>? key <w>? binding | |
set = <'#{'> content* <'}'> | |
<key> = content | |
<binding> = content | |
<op> = <'('> | |
<cp> = <')'> | |
string = <'\"'> not-quote* <'\"'> | |
<not-quote> = ( <comment> / !'\"' #'[\\s\\S]') | |
symbol = ( word | crazys )+ (':' | '\\'' | '/' |'#' | word | number | crazys)* | |
<crazys> = '*' | '$' | '!' | '&' | '.' | '+' | '?' | '<' | '>' | '-' | '_' | '=' | |
keyword = (<':'> (#'[^\\s\\[\\(\\{\\]\\)\\}]')* (#'[^\\s:\\[\\(\\{\\]\\)\\}]')+) | |
<word> = #'[a-zA-Z]+' | |
math = '+' | '-' | '/' | '=' | '*' | '<' | '<=' | '>' | '>=' | |
number = #'[0-9]+'" | |
))) | |
(def debugger1 | |
"s = ('#_' | '\\'' | '#' )? '(' ( string / content / s / '%' )* ')' | |
string = <'\"'> not-quote* <'\"'> | |
<not-quote> = ( <comment> / !'\"' #'[^\"]') | |
content = #'[^\\(\\)\\\"]+' | |
<comment> = #';.*' | |
") | |
#_(pprint (ipt debugger1 test1)) | |
(defc embed-link [link-tar s] | |
(if (str/includes? s link-tar) | |
(->> (str/split s (re-pattern link-tar)) | |
(interleave (repeat [(keyword (str "a#" link-tar)) link-tar])) | |
rest | |
(into [:div])) | |
s)) | |
#_(dbg/dbg (embed-link "hello" "(defn hello abcede)")) | |
#_(embed-link "hello" "(defn helo abc hello no hello ede)") | |
(def let-example | |
(str '(defn hello [[x y]] | |
(let [a [1 2 3 4] | |
b (map (partial * 2) (conj a 5))] | |
(go | |
(let [[e f] (take 5 b) | |
{:keys [g h]} {:crazy (a b lets go now)}] | |
(insta/viz partal a g h e f c)) | |
(let [d (take 5 b)] | |
(do | |
(if (+ 5 6 a b x y))))))))) | |
(def let-pair "[x 2 | |
y (+ 3 4)] | |
(+ 1 x)") | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
(def top-level-parser (insta/parser | |
(str | |
"file = (tops | <w> | comment | string )* | |
tops = s" | |
clj-content))) | |
#_(defn spans [t] | |
(if (sequential? t) | |
(cons | |
(while (= t :tops) | |
(insta/span t)) | |
(map spans (next t))) | |
t)) | |
(def form-transform | |
"dangerous stuff with regex here" | |
{:s list | |
:string str | |
:number makeInt | |
:keyword (comp keyword str) | |
:symbol (comp symbol str) | |
:map (comp (p into {}) vector) | |
:regex symbol | |
:kv vector | |
:vector vector | |
:set (comp (p into #{}) vector) | |
}) | |
#_(def get-strings | |
(let [f (slurp this-file) | |
p (->> (insta/parse top-level-parser f) | |
(insta/transform form-transform))] | |
(->> (take 2 p) | |
pprint))) | |
(def testsplit1 | |
(-> this-file | |
slurp | |
(str/split between-s-or-comment) | |
(->> | |
count))) | |
(def def-extractor-string | |
" <extract> = (def / ns / <experiment> ) | |
experiment = body | |
def = op <w>? type <w> name <w> body | |
body = #'[\\s\\S]*' | |
type = 'def' symbol? | |
w = #'\\s+' | |
name = symbol | |
ns = op <w>* <'ns'> <w>* nsname <w>* required <w>* cp | |
nsname = symbol | |
nspair = <'['> symbol (as|refer|<w>)* <']'> | |
required = op <':require'> (<w>* nspair)* cp | |
as = <':as'> <w>* symbol | |
refer = <':refer'> <w>* vector") | |
(def def-extractor | |
(insta/parser | |
(str | |
def-extractor-string | |
clj-content))) | |
(def ns-extractor-string | |
"ns = op <w>* <'ns'> <w>* nsname <w>* required <w>* cp | |
nsname = symbol | |
nspair = <'['> symbol (as|refer|<w>)* <']'> | |
required = op <':require'> (<w>* nspair)* cp | |
as = <':as'> <w>* symbol | |
refer = <':refer'> <w>* vector") | |
(def ns-parser | |
(insta/parser | |
(str | |
ns-extractor-string | |
clj-content))) | |
(def ns-transform | |
{:ns merge | |
:nsname (fn [n] {:name n :type :namespace}) | |
:nspair (fn [name & attrs] [name (into {} attrs)]) | |
:required (comp (fn [r] {:libraries r}) (partial into {}) vector) | |
}) | |
(defc nt [tm s] | |
(insta/transform tm s)) | |
(defc mapify [k s] | |
{k s}) | |
(def dt2 | |
{:symbol symbol | |
:def merge | |
:body nil | |
:type (comp (mapify :type) str) | |
:name (mapify :name)}) | |
;;;; current state of the art | |
(defn parse [s] | |
(let [s-f (str/split s between-s-or-comment) | |
;_ (println "S-F" s-f) | |
ds (->> s-f | |
rest | |
(map (juxt (comp (nt dt2) def-extractor) identity))) | |
;_ (println "DS" ds) | |
[[n] nst] (->> s-f | |
first | |
((juxt (comp (nt ns-transform) def-extractor) identity))) | |
;_ (println "N" n "NST" nst) | |
] | |
(cons | |
(assoc n :string nst) | |
(for [[[d] dst] ds] | |
(assoc d :string dst))))) | |
(defn parse-file [file] | |
(-> file slurp parse)) | |
(defc mapify [k s] | |
{k s}) | |
(def ke-transform {:keep (comp (mapify :keep) flatten filter-ke-tree) | |
:exclude (comp (fn [s] {:exclude s}) flatten vector) | |
:arg-exclude (p merge-with (comp flatten conj)) | |
:defn-ke (comp (mapify :keep) filter-ke-tree (p merge-with conj)) | |
}) | |
(def arg-pair "[[a b c] d] (comp a b boom c d)") | |
(def arg-extractor-string | |
(str | |
"defn-ke = arg-exclude keep+ | |
arg-exclude = <'['> ( <w> |exclude)* <']'> | |
exclude = content | |
keep = content | |
name = symbol" | |
clj-content)) | |
(def ma "(defn abcd | |
([x] (a b c x)) | |
([x y] (d e f x y)))") | |
(def sa "(defn abc [] (comp partial a b c))") | |
(defc try-transform [message transform p] | |
(try (insta/transform transform p) | |
(catch Exception e (do (print (str "Exception Caught " message "\n\n" (.getMessage e) "\nFor Transform of \n")) | |
(pprint p))))) | |
(def mconj (p merge-with conj)) | |
(defn parse2 [s] | |
(->> (try | |
((insta/parser (str | |
"TOP = def / defn / ns/ experiment | |
defn = op <w>? type <w> name (<w> op? defn-ke cp?)+ cp | |
def = op <w>? <'def'> <w> name <w> def-body cp | |
def-body = content | |
type = symbol | |
experiment = #'[\\s\\S]*' | |
name = symbol " | |
arg-extractor-string | |
ns-extractor-string | |
)) s) | |
(catch Exception e (print "GOT AN EXCeption/n/n/n" (.getMessage e)))) | |
(try-transform "on the form" form-transform) | |
(try-transform "on the custom" (merge | |
{:exclude symbol | |
:keep (mapify :keep) | |
:arg-exclude (fn [& s] {:exclude (flatten s)}) | |
:defn-ke (comp (mapify :includes) filter-ke-tree merge) | |
:def (p merge {:type :def}) | |
:defn mconj | |
:def-body (fn [&s] {:def-body (filter symbol? s)}) | |
;(comp (mapify :def-body) (p into #{}) (p keep symbol?) vector) | |
:name (mapify :name) | |
:type (mapify :type) | |
:TOP (p merge-with (comp flatten conj)) | |
} | |
ns-transform)))) | |
(def defns1 | |
(->> (parse this-file) | |
(drop 1) | |
(keep :string) | |
(keep parse2))) | |
(defn async-parse [c parse-string] | |
(go | |
(print "started parsing" (take 10 parse-string)) | |
(let [r (parse2 parse-string)] | |
(if (insta/get-failure r) | |
(put! c {:failed-string parse-string} ) | |
(put! c r)) | |
(print "put something on the chan")))) | |
(defn process-string [results-chan s] | |
(let [; results-chan (chan) | |
stuff (str/split s between-s-or-comment)] | |
(map | |
#(async-parse results-chan %) stuff))) | |
(def results-chan (chan 100)) | |
#_(process-string results-chan (slurp this-file)) | |
(def results (atom [])) | |
(defn output-loop [c db] | |
(go-loop [] | |
(let [r (<! c)] | |
(print r) | |
(swap! db conj r) | |
(recur)))) | |
(def defns1 | |
(->> (parse this-file) | |
(drop 1) | |
(keep :string) | |
(keep parse2))) | |
(defn beautify-set [s] | |
(print (apply str (interleave (repeat " \n ") s)))) | |
#_(beautify-set (set (flatten (apply concat (keep :includes defns1))))) | |
(def defns2 | |
(let [f (-> this-file | |
slurp | |
(str/split between-s-or-comment)) | |
ds (->> f | |
rest | |
(map (juxt #(try | |
(parse2 %) | |
(catch Exception e (println (.getMessage e)))) identity))) | |
[[n] nst] (->> f | |
first | |
((juxt (comp (nt ns-transform) def-extractor) identity)))] | |
ds)) | |
(def arg-extractor | |
(insta/parser | |
(str | |
arg-extractor-string | |
"<content> = (<w>? (s / comment / vector / map / string / symbol / keyword / number))" | |
))) | |
(def let-pair "[x 2 | |
y (+ 3 4)] | |
(+ 1 x)") | |
(def arg-pair "[[a b c] d] (comp a b boom c d)") | |
#_(arg-extractor arg-pair) | |
#_(->> (arg-extractor "[a b c d e] (comp {:keep [a b g d e c] :exclude [a b c]} partial a b c)") | |
(insta/transform form-transform) | |
(insta/transform ke-transform)) | |
(def let-extractor | |
(str | |
"let = <'('> <w>? <'let'> <w>? bindings <w>? keep <')'> | |
bindings = <'['> (<w>? ke-pair)* <']'> | |
ke-pair = exclude <w> keep" | |
"<content> = (<w>? (let / s / comment / vector / map / string / symbol / keyword / number))" | |
arg-extractor-string)) | |
(def let-transform | |
{:ke-pair (partial merge-with conj) | |
:bindings (partial merge-with (comp flatten conj)) | |
:let (comp (mapify :keep) filter-ke-tree (partial merge-with (comp flatten conj)))}) | |
(def let-pair "(let [x (comp partial 5 a) | |
y (+ 3 4)] | |
(+ 1 x))") | |
#_(dbg/dbg (->> ((insta/parser let-extractor) let-pair) | |
(insta/transform form-transform) | |
(insta/transform ke-transform) | |
(insta/transform let-transform))) | |
#_(clojure.pprint/pprint (keys (ns-publics 'clojure.core))) | |
(def let-extractor2 | |
(str | |
" file = ns (s | comment | w)* | |
ns = op (w | ns | within)* cp | |
def = op <w>? type <w>? name body | |
body = ((<w>? arguments? <w>? content) |<w>? arity*)+ <cp> | |
type = 'def' symbol? | |
arity = op ke-pair cp | |
<arguments> = arg-exclude | |
doc-string = string | |
name = symbol | |
let = <'('> <w>? <'let'> <w>? bindings <w>? keep <')'> | |
bindings = <'['> (<w>? ke-pair)* <']'> | |
ke-pair = exclude <w> keep" | |
" defn-ke = arg-exclude keep+ | |
arg-exclude = <'['> exclude+ <']'> | |
exclude = content | |
keep = content | |
w = #'\\s+' | |
name = symbol | |
s = ('#_' | '\\'' | '#' )? op content* cp | |
<within> = #'([^\\(\\)])*' | |
<vectorW> = #'([^\\[\\]\\s])*'")) | |
(def flatmapconj (partial merge-with (comp flatten conj))) | |
(def let-transform | |
(merge | |
{:type (mapify :type) | |
:name (mapify :name) | |
; :body vector | |
} | |
{:s list | |
:string str | |
:number makeInt | |
:keyword (comp keyword str) | |
:symbol (comp symbol str) | |
:map (comp (p into {}) vector) | |
:kv vector | |
:vector vector | |
:set (p into #{})} | |
{:keep (comp (mapify :keep) flatten filter-ke-tree) | |
:exclude (comp (fn [s] {:exclude s}) flatten vector) | |
:arg-exclude (p merge-with (comp flatten conj)) | |
:defn-ke (comp (mapify :keep) filter-ke-tree (p merge-with conj))} | |
{:ke-pair (partial merge-with conj) | |
:bindings (partial merge-with (comp flatten conj)) | |
:let (comp (mapify :keep) filter-ke-tree (partial merge-with (comp flatten conj)))})) | |
(def tops-fn | |
(fn [s] (let [r (ip let-extractor2 s)] | |
(if (insta/get-failure r) | |
s | |
(insta/transform let-transform r))))) | |
(def file-fn (comp (p filter #(= :def (first %))) vector)) | |
(def comp-transform | |
{:file vector | |
:s str | |
:within str | |
:ws nil | |
:string str | |
:comment str | |
:tops (comp (p apply str) (p take 20)) | |
}) | |
#_(def f (->> this-file | |
slurp)) | |
;; (print f) | |
#_((insta/parser let-extractor2) f) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment