Last active
January 20, 2024 05:35
-
-
Save lilactown/8e42650bcc1b45736bc52f9fdfe82dfe to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns helix.util.html) | |
(require | |
'[clojure.data.xml :as xml] | |
'[clojure.walk :as walk]) | |
(import '[clojure.data.xml Element]) | |
(defn html->dom | |
[html-string] | |
(->> html-string | |
(xml/parse-str) | |
(walk/postwalk | |
(fn [x] | |
(if (instance? Element x) | |
`(~(symbol "dom" (name (:tag x))) | |
~@(when (seq (:attrs x)) [(:attrs x)]) | |
~@(:content x)) | |
x))))) | |
(html->dom | |
"<div class=\"foo\"> | |
<button class=\"asdf\" aria-role=\"jkl\">qwfp</button> | |
<input value=\"1\" /> | |
<span><a href=\"/bar\">Baz</a></span> | |
</div>") | |
;; => (dom/div {:class "foo"} (dom/button {:class "asdf", :aria-role "jkl"} "qwfp") (dom/input {:value "1"}) (dom/span (dom/a {:href "/bar"} "Baz"))) | |
;; XML is more strict w.r.t. ending tags with a slash, and doesn't handle the | |
;; case where you have multiple root nodes, e.g. "<div>foo</div> <div>bar</div>" | |
;; hickory is another library that specifically handles HTML, converting it to | |
;; data for handling | |
(require | |
'[hickory.core :as h] | |
'[clojure.string :as string]) | |
;; => nil | |
(defn hick:html->dom | |
[html-string] | |
(let [expr (->> html-string | |
(string/split-lines) ; get rid of newlines | |
(map string/trim) ; trim whitespace | |
(string/join "") | |
(h/parse-fragment) | |
(map h/as-hickory) | |
(walk/postwalk | |
(fn [x] | |
(if (and (map? x) (:type x)) | |
;; you can change the "dom" here to be any ns/alias | |
`(~(symbol "dom" (name (:tag x))) | |
~@(when (seq (:attrs x)) [(:attrs x)]) | |
~@(:content x)) | |
x))))] | |
;; as-fragment returns a vector of one or more elements | |
;; if there's just one element, return that, else wrap in a fragment | |
(if (= 1 (count expr)) | |
(first expr) | |
(cons '<> expr)))) | |
(hick:html->dom "<div class=\"foo\"> | |
<button class=\"asdf\" aria-role=\"jkl\">qwfp</button> | |
<input value=\"1\"> | |
<span><a href=\"/bar\">Baz</a></span> | |
</div>") | |
;; => (dom/div {:class "foo"} (dom/button {:class "asdf", :aria-role "jkl"} "qwfp") (dom/input {:value "1"}) (dom/span (dom/a {:href "/bar"} "Baz"))) |
What are the two different methods intended to demonstrate? That there is choice, period? Or does the
hickory
solution contain a response to the XML tag-closing & multi-root limitations?
Oh nvm, I got it
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
What are the two different methods intended to demonstrate? That there is choice, period?
Or does the
hickory
solution contain a response to the XML tag-closing & multi-root limitations?