Skip to content

Instantly share code, notes, and snippets.

@ssadler
Created August 9, 2014 17:09
Show Gist options
  • Save ssadler/c23175ee1c0ce3bc4bcd to your computer and use it in GitHub Desktop.
Save ssadler/c23175ee1c0ce3bc4bcd to your computer and use it in GitHub Desktop.
import Control.Applicative
import Control.Monad
import Data.Aeson
import Data.Maybe
import qualified Data.ByteString.Lazy.Char8 as LC8
import qualified Data.Map as Map
import Network.HTTP.Conduit
import Text.XML.HXT.XPath.Arrows (getXPathTrees)
import Text.XML.HXT.Core
import Text.XML.HXT.XPath
dataTableXpath = "//*[@id=\"load_page\"]/div/table"
data MedicalFacility = Hospital | Clinic
main :: IO ()
main = do
forM_ hospitals $ \url -> simpleHttp url
>>= return . LC8.unpack
>>= processHospitalHtml
>>= putStrLn . LC8.unpack . encode
--processHospitalHtml :: String -> Hospital
processHospitalHtml html = do
parsed <- runX $ do
readString [withParseHTML yes, withWarnings no] html >>>
getXPathTrees dataTableXpath >>>
getChildren >>> hasName "tr" >>> cells
let cleaned = stripTable parsed
let sections = splitTable cleaned
let mapped = mapTable sections
return mapped
where
cells = listA $ multi getChildren >>> getText
mapTable sections = Map.fromList $ map mapSection sections
where
mapSection (t, pairs) = (t, Map.fromList pairs)
stripTable :: [[String]] -> [[String]]
stripTable = map stripRow where
stripRow :: [String] -> [String]
stripRow = catMaybes . map stripToMaybe
stripToMaybe :: String -> Maybe String
stripToMaybe input = let s = strip input in
if null s then Nothing else Just s
strip :: String -> String
strip = let lstrip = dropWhile (flip elem "\t\n\r ") in
reverse . lstrip . reverse . lstrip
splitTable :: [[String]] -> [(String, [(String, String)])]
splitTable t = if null t then [] else getSection t where
getSection (x:xs) = (takeHeading x, pairs) : splitTable rest
where (members, rest) = splitAt (length $ takeWhile ((==2) . length) xs) xs
pairs = map (\[a,b] -> (a,b)) members
takeHeading x = if length x == 1 then head x
else error "nos"
hospitals :: [String]
hospitals = [ "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=2"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=6"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=8"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=9"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=10"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=15"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=17"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=19"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=20"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=22"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=23"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=24"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=25"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=26"
, "http://www.moh.ps/index.php?page=9&section=gaza_hospitals&item=27"
]
clinics :: [String]
clinics = [ "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=10"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=102"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=103"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=13"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=15"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=16"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=17"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=3"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=4"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=5"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=57"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=58"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=59"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=6"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=77"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=79"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=85"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=9"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=90"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=96"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=98"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=76"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=14"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=7"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=8"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=2"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=1"
, "http://www.moh.ps/index.php?page=9&section=gaza_clinics&item=60"
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment