Last active
August 29, 2015 13:58
-
-
Save onsails/10119638 to your computer and use it in GitHub Desktop.
Simple nooby sitemap printer sample
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Network.HTTP.Conduit | |
import qualified Data.ByteString.Lazy as L | |
import Text.XML.Light | |
import Control.Monad.Trans (liftIO) | |
import Control.Monad | |
import Data.String.Utils | |
import Control.Exception | |
import Control.Monad.Trans.Resource | |
import Control.Monad.Trans.Class | |
download :: Manager -> Request -> IO (Either HttpException L.ByteString) | |
download manager req = | |
try $ | |
fmap responseBody (httpLbs req manager) | |
downloadUrl :: Manager -> String -> IO (Either HttpException L.ByteString) | |
downloadUrl manager url = do | |
request <- parseUrl url | |
download manager request | |
getPages :: Manager -> [String] -> [IO (Either HttpException L.ByteString)] | |
getPages manager = | |
map (downloadUrl manager) | |
handlePages :: [IO (Either HttpException L.ByteString)] -> ResourceT IO () | |
handlePages pages = | |
-- TODO: handlePage function to do the stuff | |
lift $ mapM_ (\ page' -> page' >>= \page -> print page) pages | |
main :: IO () | |
main = withManager $ \ manager -> do | |
-- I know simpleHttp is bad here | |
mapSource <- liftIO $ simpleHttp "http://www.google.com/sitemap.xml" | |
let elements = parseXMLDoc mapSource >>= Just . findElements (mapElement "loc") | |
Just urls = liftM (map $ (replace "/#!" "?_escaped_fragment_=") . strContent) elements | |
mapElement name = QName name (Just "http://www.sitemaps.org/schemas/sitemap/0.9") Nothing | |
handlePages $ getPages manager urls |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment