Skip to content

Instantly share code, notes, and snippets.

@vito
Created August 15, 2010 00:24
Show Gist options
  • Save vito/524881 to your computer and use it in GitHub Desktop.
Save vito/524881 to your computer and use it in GitHub Desktop.
module A.AutoFlow where
import Data.Char (isSpace)
import Text.HTML.TagSoup
autoFlow :: String -> String
autoFlow = renderTags . autoFlow' False . canonicalizeTags . parseTags
where
autoFlow' open []
| open = [TagClose "p"]
| otherwise = []
autoFlow' open (o@(TagOpen n _):ts)
-- inline element, already in a <p>; carry on
| isPhrasing n && open = o : autoFlow' open ts
-- inline element, not in a <p>; start a <p>
| isPhrasing n = TagOpen "p" [] : o : autoFlow' True ts
-- Opening a block element; </p>
| open = TagClose "p" : autoFlow' False (o:ts)
-- A block that can contain <p>
| isFlowable n = o : autoFlow' False tagContents ++ [TagClose n] ++ autoFlow' False rest
-- Not <p>-able; leave it alone
| otherwise = o : tagContents ++ [TagClose n] ++ autoFlow' False rest
where
tagContents = getTagContent n (o:ts)
rest = drop (length tagContents + 1) ts
autoFlow' open (t@(TagText c):ts)
-- skip whitespace
| all isSpace c = t : autoFlow' open ts
-- a new paragraph, indicated by a linebreak
| open && head c == '\n' = TagClose "p" : autoFlow' False (t:ts)
-- a new paragraph, indicated by a linebreak
| head c == '\n' = TagOpen "p" [] : autoFlow' True (TagText (dropWhile (== '\n') c) : ts)
-- already open; carry on
| open = thisPara : autoFlow' True rest
-- text, pop it in a <p>
| otherwise = TagOpen "p" [] : t : autoFlow' True ts
where
thisPara = TagText (takeWhile (/= '\n') c)
rest
| thisPara == t = ts
| otherwise = TagText (dropWhile (/= '\n') c) : ts
autoFlow' True (t@(TagClose n):ts)
| isBlock n =
TagClose "p" : t : autoFlow' False ts
autoFlow' open (t:ts) = t : autoFlow' open ts
isPhrasing :: String -> Bool
isPhrasing = flip elem . words $
"a abbr area audio b bdo br button canvas cite code command datelist del dfn em embed i iframe img input ins kbd keygen label link map mark math meta meter noscript object output progress q ruby samp script select small span strong sub sup svg textarea time var video wbr"
-- flow content, excluding phrasing
isBlock :: String -> Bool
isBlock = flip elem . words $
"article aside blockquote details div dl fieldset figure footer form h1 h2 h3 h4 h5 h6 header hgroup hr menu nav ol p pre section style table ul"
isFlow :: String -> Bool
isFlow x = isPhrasing x || isBlock x
isFlowable :: String -> Bool
isFlowable = flip elem . words $
"div"
-- grab a tag's content, including nested instances of the same tag
getTagContent :: String -> [Tag String] -> [Tag String]
getTagContent n ts' = getTagContent' 0 [] ts'
where
getTagContent' :: Int -> [Tag String] -> [Tag String] -> [Tag String]
-- error: reached the end with an unclosed tag
getTagContent' _ _ [] = error $ "unmatched tag " ++ n ++ " in: " ++ show ts'
-- final closing tag
getTagContent' 1 acc (TagClose tn:_) | tn == n =
reverse acc
-- initial opening tag
getTagContent' 0 acc (TagOpen tn _:ts) | tn == n =
getTagContent' 1 acc ts
-- nested open tag
getTagContent' d acc (TagOpen tn as:ts) | tn == n =
getTagContent' (d + 1) (TagOpen tn as:acc) ts
-- nested close tag
getTagContent' d acc (TagClose tn:ts) | tn == n =
getTagContent' (d - 1) (TagClose tn:acc) ts
-- content
getTagContent' d acc (t:ts) =
getTagContent' d (t:acc) ts
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment