Last active
February 18, 2020 10:58
-
-
Save dradtke/5817376 to your computer and use it in GitHub Desktop.
Simple Haskell RSS downloader and parser.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main where | |
import Control.Monad | |
import Data.List (find) | |
import Data.Maybe | |
import Network.HTTP | |
import System.Exit | |
import System.IO | |
import Text.XML.Light | |
feedUrl :: String | |
feedUrl = "http://feeds.arstechnica.com/arstechnica/index?format=xml" | |
data Item = Item { itTitle :: String | |
, itLink :: String | |
} deriving (Show) | |
data Channel = Channel { chTitle :: String | |
, chDescription :: String | |
-- TODO: add last build date, language, etc. | |
, chItems :: [Item] | |
} deriving (Show) | |
main = do | |
-- get the data from the feed | |
feed <- simpleHTTP (getRequest feedUrl) >>= getResponseBody | |
-- parse it | |
let datums = parseXML feed | |
-- find the root rss node, and quit if there is none | |
let root' = findRoot datums | |
when (isNothing root') $ do putStrLn "root node not found!" ; exitFailure | |
let root = fromJust root' | |
let channels = map parseChannel $ findChildren (QName "channel" Nothing Nothing) root | |
hSetEncoding stdout utf8 | |
putStrLn $ printChannel (head channels) | |
-- | Returns the root RSS element if it exists. | |
findRoot :: [Content] -> Maybe Element | |
findRoot = findRoot' . onlyElems | |
where findRoot' = find $ (== QName "rss" Nothing Nothing) . elName | |
-- | Returns the text content of the child with the given name. Throws an exception | |
-- if it wasn't found. | |
prop :: Element -> String -> String | |
prop node name = strContent . fromJust $ findChild (QName name Nothing Nothing) node | |
-- | Parses a <channel> node into a Channel object. | |
parseChannel :: Element -> Channel | |
parseChannel node = Channel { chTitle = title, chDescription = desc, chItems = items } | |
where title = prop node "title" | |
desc = prop node "description" | |
items = map parseItem $ findChildren (QName "item" Nothing Nothing) node | |
-- | Parses an <item> node into an Item object. | |
parseItem :: Element -> Item | |
parseItem node = Item { itTitle = title, itLink = link } | |
where title = prop node "title" | |
link = prop node "link" | |
printChannel :: Channel -> String | |
printChannel channel = fullTitle ++ "\n" ++ ['=' | _ <- fullTitle] ++ "\n" ++ content | |
where fullTitle = chTitle channel ++ " - " ++ chDescription channel | |
content = unlines $ map printItem (chItems channel) | |
printItem :: Item -> String | |
printItem item = itTitle item ++ "\n" ++ itLink item ++ "\n" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment