Created
March 17, 2019 10:47
-
-
Save neongreen/cd7fcf98771e252410b6c538545b1b0d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE DeriveGeneric #-} | |
{-# LANGUAGE OverloadedStrings #-} | |
{-# LANGUAGE ApplicativeDo #-} | |
module Main where | |
import Data.Default | |
import Network.HTTP.Req | |
import Data.Time.Calendar | |
import qualified Data.Text as Text | |
import qualified Data.Text.IO as Text | |
import Data.Text (Text) | |
import Data.Text.Encoding | |
import Data.Foldable | |
import Fmt | |
import GHC.Generics | |
import Text.Regex.Applicative | |
import Data.Char | |
import Control.Monad | |
import Data.Maybe | |
import Text.HTML.TagSoup | |
import Text.HTML.TagSoup.Tree | |
main :: IO () | |
main = do | |
bs <- runReq def $ | |
req GET (http "www.zib.de" /: "mathematics-calendar") | |
NoReqBody | |
bsResponse | |
mempty | |
let parsed = parseTree (decodeUtf8 (responseBody bs)) | |
for_ (calendarEntries parsed) $ \entry -> do | |
pretty $ genericF $ parseCalendarEntry entry | |
putStrLn "" | |
data CalendarEntry = CalendarEntry | |
{ date :: Day | |
, author :: Text | |
, title :: Text | |
, datetime :: Text | |
, locationName :: Text | |
, locationAddress :: Text | |
} deriving (Eq, Show, Generic) | |
calendarEntries :: [TagTree Text] -> [[TagTree Text]] | |
calendarEntries t = | |
[ subtrees | |
| TagBranch _ attrs subtrees <- universeTree t | |
, ("class", "calendar-entry") `Prelude.elem` attrs | |
] | |
parseCalendarEntry :: [TagTree Text] -> CalendarEntry | |
parseCalendarEntry t = do | |
let getClass className = | |
innerText $ flattenTree $ concat $ | |
[ subtrees | |
| TagBranch _ attrs subtrees <- universeTree t | |
, ("class", className) `Prelude.elem` attrs | |
] | |
CalendarEntry | |
{ date = parseDay $ getClass "date" | |
, author = getClass "author" | |
, title = getClass "title" | |
, datetime = getClass "datetime" | |
, locationAddress = getClass "location-address" | |
, locationName = getClass "location-name" | |
} | |
parseDay :: Text -> Day | |
parseDay = fromMaybe (error "bad Day") . match regex . Text.unpack | |
where | |
digit = psym isDigit | |
regex = do | |
d <- read <$> some digit <* "." | |
m <- read <$> some digit <* "." | |
y <- read <$> replicateM 4 digit | |
pure (fromGregorian y m d) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment