Created
November 18, 2019 18:58
-
-
Save fakedrake/006cfcb6c18fa0f6f14d812800bd2c5d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
module Angelos () where | |
import Control.Applicative | |
import Text.HTML.Scalpel | |
type Author = String | |
data Comment | |
= TextComment Author String | |
| ImageComment Author URL | |
deriving (Show, Eq) | |
allComments :: IO (Maybe [Comment]) | |
allComments = scrapeURL "http://example.com/article.html" comments | |
where | |
comments :: Scraper String [Comment] | |
comments = chroots ("div" @: [hasClass "container"]) comment | |
comment :: Scraper String Comment | |
comment = textComment <|> imageComment | |
textComment :: Scraper String Comment | |
textComment = do | |
author <- text $ "span" @: [hasClass "author"] | |
commentText <- text $ "div" @: [hasClass "text"] | |
return $ TextComment author commentText | |
imageComment :: Scraper String Comment | |
imageComment = do | |
author <- text $ "span" @: [hasClass "author"] | |
imageURL <- attr "src" $ "img" @: [hasClass "image"] | |
return $ ImageComment author imageURL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment