Created
March 20, 2012 18:48
-
-
Save eagletmt/2139669 to your computer and use it in GitHub Desktop.
Haskellでいかにしておっぱい画像をダウンロードするか〜2012
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE OverloadedStrings, RecordWildCards, TupleSections #-} | |
-- Build-depends: | |
-- base, transformers, directory, filepath, bytestring, text, vector | |
-- , aeson, lifted-base, conduit, attoparsec-conduit, http-types, http-conduit | |
-- たぶん cabal install aeson http-conduit で全部入る。 | |
import Control.Applicative | |
import Control.Monad (void, when) | |
import Control.Monad.IO.Class | |
import Data.String (IsString) | |
import qualified Data.ByteString as S | |
import qualified Data.ByteString.Char8 as S8 | |
import qualified Data.Text as T | |
import qualified Data.Vector as V | |
import Data.Monoid ((<>)) | |
import qualified Control.Exception.Lifted as E | |
import Control.Concurrent.Lifted | |
import System.Timeout.Lifted (timeout) | |
import System.FilePath ((</>)) | |
import System.Directory (createDirectoryIfMissing) | |
import Data.Aeson | |
import Data.Conduit | |
import Data.Conduit.Binary (sinkFile) | |
import Data.Conduit.Attoparsec (sinkParser) | |
import Network.HTTP.Conduit | |
import Network.HTTP.Types | |
newtype SearchResponse = SR Image | |
instance FromJSON SearchResponse where | |
parseJSON (Object m) = SR <$> m .: "SearchResponse" | |
parseJSON _ = fail "SearchResponse" | |
data Image = ImageOk ImageResult | ImageFail (V.Vector ImageError) | |
instance FromJSON Image where | |
parseJSON (Object m) = ImageOk <$> m .: "Image" <|> ImageFail <$> m .: "Errors" | |
parseJSON _ = fail "Image" | |
data ImageError = ImageError { errCode :: Int, errParameter :: String, errMessage :: String } | |
instance FromJSON ImageError where | |
parseJSON (Object m) = ImageError <$> m .: "Code" <*> m .: "Parameter" <*> m .: "Message" | |
parseJSON _ = fail "ImageError" | |
newtype ImageResult = IR (V.Vector Media) | |
instance FromJSON ImageResult where | |
parseJSON (Object m) = IR <$> m .: "Results" | |
parseJSON _ = fail "Results" | |
newtype Media = Media { mediaUrl :: S.ByteString } | |
instance FromJSON Media where | |
parseJSON (Object m) = Media <$> m .: "MediaUrl" | |
parseJSON _ = fail "Media" | |
appId :: IsString s => s | |
appId = "APP ID" | |
baseDir :: FilePath | |
baseDir = "./data" | |
main :: IO () | |
main = do | |
createDirectoryIfMissing True baseDir | |
baseReq <- parseUrl "http://api.bing.net/json.aspx" | |
withManager $ \mgr -> sequenceWhile_ $ map (getImages baseReq mgr "おっぱい" 50) [0..] | |
sequenceWhile_ :: Monad m => [m Bool] -> m () | |
sequenceWhile_ [] = return () | |
sequenceWhile_ (a:as) = do | |
x <- a | |
when x $ sequenceWhile_ as | |
getImages :: ResourceIO m => Request m -> Manager -> T.Text -> Int -> Int -> ResourceT m Bool | |
getImages baseReq mgr query count offset = do | |
res <- http req mgr | |
j <- responseBody res $$ sinkParser json' | |
liftIO $ | |
case fromJSON j of | |
Error msg -> do | |
putStrLn $ "JSON parse error: " <> msg | |
print j | |
return False | |
Success (SR (ImageOk (IR ms))) -> do | |
downloadAll $ V.filter (".jpg" `S.isSuffixOf`) $ V.map mediaUrl ms | |
return True | |
Success (SR (ImageFail es)) -> do | |
putStrLn "API error:" | |
V.forM_ es $ \ImageError{..} -> putStrLn $ " Code " <> show errCode <> ": " <> errParameter <> ": " <> errMessage | |
return False | |
where | |
req = baseReq | |
{ queryString = renderQuery False $ queryTextToQuery $ map (\(k, v) -> (k, Just v)) | |
[ ("AppId", appId) | |
, ("Version", "2.2") | |
, ("Market", "ja-JP") | |
, ("Sources", "Image") | |
, ("Image.Count", T.pack $ show count) | |
, ("Image.Offset", T.pack $ show $ offset * count) | |
, ("Adult", "off") | |
, ("Query", query) | |
] | |
} | |
downloadAll :: V.Vector S.ByteString -> IO () | |
downloadAll urls = withManager $ \mgr -> do | |
ms <- V.replicateM (V.length urls) newEmptyMVar | |
V.forM_ (V.zip urls ms) $ \(url, mv) -> | |
fork $ void $ timeout (30 * 10^6) (download mgr url) `E.finally` putMVar mv () | |
V.mapM_ takeMVar ms | |
download :: ResourceIO m => Manager -> S.ByteString -> ResourceT m () | |
download mgr url = do | |
req <- liftIO $ parseUrl strUrl | |
liftIO $ S8.putStrLn $ "Downloading " <> url | |
flip E.catch httpError $ do | |
res <- http req mgr | |
responseBody res $$ sinkFile to | |
where | |
strUrl = S8.unpack url | |
to = baseDir </> map (\c -> if c == '/' then '-' else c) strUrl | |
httpError :: ResourceIO m => HttpException -> ResourceT m () | |
httpError err = liftIO $ putStrLn $ strUrl <> ": " <> show err |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment