Created
May 3, 2013 20:50
-
-
Save digizeph/5513994 to your computer and use it in GitHub Desktop.
Haskell代码,用来下载时光网(mtime.com)安妮海瑟薇的相册。工作不稳定,有可能被时光网打飞,如果下载不动,请重新运行,不会重复下载!【添加功能,能够通过命令行参数方式输入需要下载的地址首页】
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import System.Directory | |
import System.IO | |
import System.Environment | |
import Network.HTTP | |
import Network.URI (parseURI) | |
import Text.Regex.Posix | |
import Data.Maybe | |
import Control.Monad -- when | |
import qualified Data.ByteString as B | |
-- mtime.cn Anne Hathaway's photo albums' main page | |
anneHathaway = "http://people.mtime.com/924124/photo_gallery/images.html" | |
taylorSwift = "http://people.mtime.com/1494195/photo_gallery/images.html" | |
parse [] = [anneHathaway,taylorSwift] | |
parse xs = xs | |
main = do | |
-- Start from main page. | |
args <- getArgs | |
let mainPage = parse args | |
mapM (processPage) (mainPage) | |
where | |
processPage p | |
| p=="" = putStrLn "empty match!" >> return () | |
| otherwise = do | |
putStrLn p | |
let src = openURL p | |
pages <- fmap (spage) (src) | |
mapM (process) (pages) | |
putStrLn "" | |
putStrLn "Photos download finished." | |
nextlink <- fmap (findNext ) (src) | |
processPage nextlink | |
{- | |
Get web contents | |
-} | |
-- Retrieve a html source code of a webpage. | |
openURL url = let uri = case parseURI url of | |
Nothing -> error $ "Invalid page URL:" ++ url | |
Just u -> u in | |
simpleHTTP (getRequest url) >>= getResponseBody | |
-- Download a picture from one webpage | |
openPIC url = let uri = case parseURI url of | |
Nothing -> error $ "Invalid pic URL:" ++ url | |
Just u -> u in | |
simpleHTTP (defaultGETRequest_ uri) >>= getResponseBody | |
{- | |
Regex pattern matching for URLs | |
-} | |
-- Searching for photo-containing webpage, and return the URL | |
spage xs = filter (/="") $ map search (lines xs) | |
where search x = x =~pat ::String | |
pat = "(http://people.mtime.com/[0-9]+/photo_gallery/[0-9]+/)" | |
-- On each photo webpage, find and download the large photo | |
process page = do | |
picPage <- openURL page | |
let picLink = ((picPage =~ "\n(.*)block mauto"::String) =~ "(http.*jpg)"::String) | |
if picLink=="" then -- in case of no pic matches | |
putStrLn page | |
else do | |
let dirName = page =~ "([0-9]+)" | |
createDirectoryIfMissing False dirName | |
let filename = dirName ++ "/" ++(picLink=~"([0-9]+\\.[0-9]+\\.jpg)") | |
ex <- doesFileExist filename | |
if ex | |
then | |
putStr "-" | |
else do | |
jpg <- openPIC picLink | |
B.writeFile filename jpg | |
putStr "." | |
hFlush stdout | |
-- Find next menu page containing links to photo pages. | |
findNext src = | |
case nextlink of | |
"" -> "" | |
l -> "http://people.mtime.com"++l | |
where nextlink = ((src =~ "ml10 next(.*)\n"::String) =~ "(/[0-9]+/photo_gallery/images-[0-9]+\\.html)"::String) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
👍