Created
August 9, 2014 17:09
-
-
Save ssadler/c23175ee1c0ce3bc4bcd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Control.Applicative | |
import Control.Monad | |
import Data.Aeson | |
import Data.Maybe | |
import qualified Data.ByteString.Lazy.Char8 as LC8 | |
import qualified Data.Map as Map | |
import Network.HTTP.Conduit | |
import Text.XML.HXT.XPath.Arrows (getXPathTrees) | |
import Text.XML.HXT.Core | |
import Text.XML.HXT.XPath | |
dataTableXpath = "//*[@id=\"load_page\"]/div/table" | |
data MedicalFacility = Hospital | Clinic | |
main :: IO () | |
main = do | |
forM_ hospitals $ \url -> simpleHttp url | |
>>= return . LC8.unpack | |
>>= processHospitalHtml | |
>>= putStrLn . LC8.unpack . encode | |
--processHospitalHtml :: String -> Hospital | |
processHospitalHtml html = do | |
parsed <- runX $ do | |
readString [withParseHTML yes, withWarnings no] html >>> | |
getXPathTrees dataTableXpath >>> | |
getChildren >>> hasName "tr" >>> cells | |
let cleaned = stripTable parsed | |
let sections = splitTable cleaned | |
let mapped = mapTable sections | |
return mapped | |
where | |
cells = listA $ multi getChildren >>> getText | |
mapTable sections = Map.fromList $ map mapSection sections | |
where | |
mapSection (t, pairs) = (t, Map.fromList pairs) | |
stripTable :: [[String]] -> [[String]] | |
stripTable = map stripRow where | |
stripRow :: [String] -> [String] | |
stripRow = catMaybes . map stripToMaybe | |
stripToMaybe :: String -> Maybe String | |
stripToMaybe input = let s = strip input in | |
if null s then Nothing else Just s | |
strip :: String -> String | |
strip = let lstrip = dropWhile (flip elem "\t\n\r ") in | |
reverse . lstrip . reverse . lstrip | |
splitTable :: [[String]] -> [(String, [(String, String)])] | |
splitTable t = if null t then [] else getSection t where | |
getSection (x:xs) = (takeHeading x, pairs) : splitTable rest | |
where (members, rest) = splitAt (length $ takeWhile ((==2) . length) xs) xs | |
pairs = map (\[a,b] -> (a,b)) members | |
takeHeading x = if length x == 1 then head x | |
else error "nos" | |
hospitals :: [String] | |
hospitals = [ "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=2" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=6" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=8" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=9" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=10" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=15" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=17" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=19" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=20" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=22" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=23" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=24" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=25" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=26" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_hospitals&item=27" | |
] | |
clinics :: [String] | |
clinics = [ "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=10" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=102" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=103" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=13" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=15" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=16" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=17" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=3" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=4" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=5" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=57" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=58" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=59" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=6" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=77" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=79" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=85" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=9" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=90" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=96" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=98" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=76" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=14" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=7" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=8" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=2" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=1" | |
, "http://www.moh.ps/index.php?page=9§ion=gaza_clinics&item=60" | |
] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment