Skip to content

Instantly share code, notes, and snippets.

@jakobrs
Last active August 5, 2022 12:57
Show Gist options
  • Save jakobrs/2cab4d2b5487810d3b7846ad576e9eb4 to your computer and use it in GitHub Desktop.
Save jakobrs/2cab4d2b5487810d3b7846ad576e9eb4 to your computer and use it in GitHub Desktop.
Code to build a tree from a list of tagsoup tags lazily
module Lib
( TagTree (..),
tagTree,
flattenTree,
parseTree,
renderTree,
)
where
import Text.StringLike (StringLike (..), fromString)
import Text.HTML.TagSoup
data TagTree str = TagBranch str [Attribute str] [TagTree str] | TagLeaf (Tag str) deriving (Functor, Eq, Ord, Show)
data Pair a b = Pair a b
instance Functor (Pair a) where
fmap f ~(Pair a b) = Pair a (f b)
sndPair (Pair _ b) = b
flattenTree :: [TagTree str] -> [Tag str]
flattenTree [] = []
flattenTree (TagLeaf tag : xs) = tag : flattenTree xs
flattenTree (TagBranch tag attrs contents : xs) = TagOpen tag attrs : flattenTree contents ++ TagClose tag : flattenTree xs
tagTree :: forall str. (StringLike str) => [Tag str] -> [TagTree str]
tagTree = sndPair . go []
where
go :: [str] -> [Tag str] -> Pair [Tag str] [TagTree str]
go [] [] = Pair [] []
go (here : _) [] = Pair [] [TagLeaf (TagWarning (fromString ("Missing closing tag: " ++ toString here)))]
go [] (TagClose tag : xs) = (TagLeaf (TagWarning (fromString ("Unexpected closing tag: " ++ toString tag))) :) <$> go [] xs
go (here : stack) (TagClose tag : xs)
| tag == here = Pair xs []
| otherwise = Pair (TagClose tag : xs) []
go stack (TagOpen tag attrs : xs)
| tag `notElem` voidElements =
let (Pair rest contents) = go (tag : stack) xs
in (TagBranch tag attrs contents :) <$> go stack rest
go stack (misctag : xs) = (TagLeaf misctag :) <$> go stack xs
voidElements :: [str]
voidElements =
fromString
<$> [ "area",
"base",
"br",
"col",
"embed",
"hr",
"img",
"input",
"link",
"meta",
"source",
"track",
"wbr"
]
parseTree :: StringLike str => str -> [TagTree str]
parseTree = tagTree . parseTags
renderTree :: StringLike str => [TagTree str] -> str
renderTree = renderTags . flattenTree
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment