Last active
August 5, 2022 12:57
-
-
Save jakobrs/2cab4d2b5487810d3b7846ad576e9eb4 to your computer and use it in GitHub Desktop.
Code to build a tree from a list of tagsoup tags lazily
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Lib | |
( TagTree (..), | |
tagTree, | |
flattenTree, | |
parseTree, | |
renderTree, | |
) | |
where | |
import Text.StringLike (StringLike (..), fromString) | |
import Text.HTML.TagSoup | |
data TagTree str = TagBranch str [Attribute str] [TagTree str] | TagLeaf (Tag str) deriving (Functor, Eq, Ord, Show) | |
data Pair a b = Pair a b | |
instance Functor (Pair a) where | |
fmap f ~(Pair a b) = Pair a (f b) | |
sndPair (Pair _ b) = b | |
flattenTree :: [TagTree str] -> [Tag str] | |
flattenTree [] = [] | |
flattenTree (TagLeaf tag : xs) = tag : flattenTree xs | |
flattenTree (TagBranch tag attrs contents : xs) = TagOpen tag attrs : flattenTree contents ++ TagClose tag : flattenTree xs | |
tagTree :: forall str. (StringLike str) => [Tag str] -> [TagTree str] | |
tagTree = sndPair . go [] | |
where | |
go :: [str] -> [Tag str] -> Pair [Tag str] [TagTree str] | |
go [] [] = Pair [] [] | |
go (here : _) [] = Pair [] [TagLeaf (TagWarning (fromString ("Missing closing tag: " ++ toString here)))] | |
go [] (TagClose tag : xs) = (TagLeaf (TagWarning (fromString ("Unexpected closing tag: " ++ toString tag))) :) <$> go [] xs | |
go (here : stack) (TagClose tag : xs) | |
| tag == here = Pair xs [] | |
| otherwise = Pair (TagClose tag : xs) [] | |
go stack (TagOpen tag attrs : xs) | |
| tag `notElem` voidElements = | |
let (Pair rest contents) = go (tag : stack) xs | |
in (TagBranch tag attrs contents :) <$> go stack rest | |
go stack (misctag : xs) = (TagLeaf misctag :) <$> go stack xs | |
voidElements :: [str] | |
voidElements = | |
fromString | |
<$> [ "area", | |
"base", | |
"br", | |
"col", | |
"embed", | |
"hr", | |
"img", | |
"input", | |
"link", | |
"meta", | |
"source", | |
"track", | |
"wbr" | |
] | |
parseTree :: StringLike str => str -> [TagTree str] | |
parseTree = tagTree . parseTags | |
renderTree :: StringLike str => [TagTree str] -> str | |
renderTree = renderTags . flattenTree |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment