Skip to content

Instantly share code, notes, and snippets.

@eagletmt
Created March 20, 2012 18:48
Show Gist options
  • Save eagletmt/2139669 to your computer and use it in GitHub Desktop.
Save eagletmt/2139669 to your computer and use it in GitHub Desktop.
Haskellでいかにしておっぱい画像をダウンロードするか〜2012
{-# LANGUAGE OverloadedStrings, RecordWildCards, TupleSections #-}
-- Build-depends:
-- base, transformers, directory, filepath, bytestring, text, vector
-- , aeson, lifted-base, conduit, attoparsec-conduit, http-types, http-conduit
-- たぶん cabal install aeson http-conduit で全部入る。
import Control.Applicative
import Control.Monad (void, when)
import Control.Monad.IO.Class
import Data.String (IsString)
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import qualified Data.Text as T
import qualified Data.Vector as V
import Data.Monoid ((<>))
import qualified Control.Exception.Lifted as E
import Control.Concurrent.Lifted
import System.Timeout.Lifted (timeout)
import System.FilePath ((</>))
import System.Directory (createDirectoryIfMissing)
import Data.Aeson
import Data.Conduit
import Data.Conduit.Binary (sinkFile)
import Data.Conduit.Attoparsec (sinkParser)
import Network.HTTP.Conduit
import Network.HTTP.Types
newtype SearchResponse = SR Image
instance FromJSON SearchResponse where
parseJSON (Object m) = SR <$> m .: "SearchResponse"
parseJSON _ = fail "SearchResponse"
data Image = ImageOk ImageResult | ImageFail (V.Vector ImageError)
instance FromJSON Image where
parseJSON (Object m) = ImageOk <$> m .: "Image" <|> ImageFail <$> m .: "Errors"
parseJSON _ = fail "Image"
data ImageError = ImageError { errCode :: Int, errParameter :: String, errMessage :: String }
instance FromJSON ImageError where
parseJSON (Object m) = ImageError <$> m .: "Code" <*> m .: "Parameter" <*> m .: "Message"
parseJSON _ = fail "ImageError"
newtype ImageResult = IR (V.Vector Media)
instance FromJSON ImageResult where
parseJSON (Object m) = IR <$> m .: "Results"
parseJSON _ = fail "Results"
newtype Media = Media { mediaUrl :: S.ByteString }
instance FromJSON Media where
parseJSON (Object m) = Media <$> m .: "MediaUrl"
parseJSON _ = fail "Media"
appId :: IsString s => s
appId = "APP ID"
baseDir :: FilePath
baseDir = "./data"
main :: IO ()
main = do
createDirectoryIfMissing True baseDir
baseReq <- parseUrl "http://api.bing.net/json.aspx"
withManager $ \mgr -> sequenceWhile_ $ map (getImages baseReq mgr "おっぱい" 50) [0..]
sequenceWhile_ :: Monad m => [m Bool] -> m ()
sequenceWhile_ [] = return ()
sequenceWhile_ (a:as) = do
x <- a
when x $ sequenceWhile_ as
getImages :: ResourceIO m => Request m -> Manager -> T.Text -> Int -> Int -> ResourceT m Bool
getImages baseReq mgr query count offset = do
res <- http req mgr
j <- responseBody res $$ sinkParser json'
liftIO $
case fromJSON j of
Error msg -> do
putStrLn $ "JSON parse error: " <> msg
print j
return False
Success (SR (ImageOk (IR ms))) -> do
downloadAll $ V.filter (".jpg" `S.isSuffixOf`) $ V.map mediaUrl ms
return True
Success (SR (ImageFail es)) -> do
putStrLn "API error:"
V.forM_ es $ \ImageError{..} -> putStrLn $ " Code " <> show errCode <> ": " <> errParameter <> ": " <> errMessage
return False
where
req = baseReq
{ queryString = renderQuery False $ queryTextToQuery $ map (\(k, v) -> (k, Just v))
[ ("AppId", appId)
, ("Version", "2.2")
, ("Market", "ja-JP")
, ("Sources", "Image")
, ("Image.Count", T.pack $ show count)
, ("Image.Offset", T.pack $ show $ offset * count)
, ("Adult", "off")
, ("Query", query)
]
}
downloadAll :: V.Vector S.ByteString -> IO ()
downloadAll urls = withManager $ \mgr -> do
ms <- V.replicateM (V.length urls) newEmptyMVar
V.forM_ (V.zip urls ms) $ \(url, mv) ->
fork $ void $ timeout (30 * 10^6) (download mgr url) `E.finally` putMVar mv ()
V.mapM_ takeMVar ms
download :: ResourceIO m => Manager -> S.ByteString -> ResourceT m ()
download mgr url = do
req <- liftIO $ parseUrl strUrl
liftIO $ S8.putStrLn $ "Downloading " <> url
flip E.catch httpError $ do
res <- http req mgr
responseBody res $$ sinkFile to
where
strUrl = S8.unpack url
to = baseDir </> map (\c -> if c == '/' then '-' else c) strUrl
httpError :: ResourceIO m => HttpException -> ResourceT m ()
httpError err = liftIO $ putStrLn $ strUrl <> ": " <> show err
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment