Skip to content

Instantly share code, notes, and snippets.

@dradtke
Last active February 18, 2020 10:58
Show Gist options
  • Save dradtke/5817376 to your computer and use it in GitHub Desktop.
Save dradtke/5817376 to your computer and use it in GitHub Desktop.
Simple Haskell RSS downloader and parser.
module Main where
import Control.Monad
import Data.List (find)
import Data.Maybe
import Network.HTTP
import System.Exit
import System.IO
import Text.XML.Light
feedUrl :: String
feedUrl = "http://feeds.arstechnica.com/arstechnica/index?format=xml"
data Item = Item { itTitle :: String
, itLink :: String
} deriving (Show)
data Channel = Channel { chTitle :: String
, chDescription :: String
-- TODO: add last build date, language, etc.
, chItems :: [Item]
} deriving (Show)
main = do
-- get the data from the feed
feed <- simpleHTTP (getRequest feedUrl) >>= getResponseBody
-- parse it
let datums = parseXML feed
-- find the root rss node, and quit if there is none
let root' = findRoot datums
when (isNothing root') $ do putStrLn "root node not found!" ; exitFailure
let root = fromJust root'
let channels = map parseChannel $ findChildren (QName "channel" Nothing Nothing) root
hSetEncoding stdout utf8
putStrLn $ printChannel (head channels)
-- | Returns the root RSS element if it exists.
findRoot :: [Content] -> Maybe Element
findRoot = findRoot' . onlyElems
where findRoot' = find $ (== QName "rss" Nothing Nothing) . elName
-- | Returns the text content of the child with the given name. Throws an exception
-- if it wasn't found.
prop :: Element -> String -> String
prop node name = strContent . fromJust $ findChild (QName name Nothing Nothing) node
-- | Parses a <channel> node into a Channel object.
parseChannel :: Element -> Channel
parseChannel node = Channel { chTitle = title, chDescription = desc, chItems = items }
where title = prop node "title"
desc = prop node "description"
items = map parseItem $ findChildren (QName "item" Nothing Nothing) node
-- | Parses an <item> node into an Item object.
parseItem :: Element -> Item
parseItem node = Item { itTitle = title, itLink = link }
where title = prop node "title"
link = prop node "link"
printChannel :: Channel -> String
printChannel channel = fullTitle ++ "\n" ++ ['=' | _ <- fullTitle] ++ "\n" ++ content
where fullTitle = chTitle channel ++ " - " ++ chDescription channel
content = unlines $ map printItem (chItems channel)
printItem :: Item -> String
printItem item = itTitle item ++ "\n" ++ itLink item ++ "\n"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment