-
-
Save naoyeye/0f33a8174aa0f6dee0c96ad68e1506ef to your computer and use it in GitHub Desktop.
下载豆瓣相册图片
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# Filename: get_douban_album_pic.py | |
import urllib2, re, os, sys | |
from os.path import basename | |
from urlparse import urlsplit | |
def fetch(url): | |
try: | |
return urllib2.urlopen(url).read() | |
except Exception as e: | |
print(e) | |
def get_count(url): | |
urlContent = fetch(url) | |
ptn = u'<span class="count">\(\u5171(\d+)\u5f20\)</span>' | |
count = re.findall(ptn, unicode(urlContent, "utf-8")) | |
if len(count) < 1: | |
return 1 | |
return int(count[0]) | |
def down_img(url): | |
urlContent = fetch(url) | |
spans = r'(https://img\d.douban.com/view/photo/thumb/public/p\d+.webp)' | |
imgUrls = re.findall(spans, urlContent) | |
#1770000000 号以后的照片都有更大尺寸的原图,url 中含 large. via: 豆藤 | |
imgUrls = [ | |
i.replace('thumb', 'large') | |
if int(re.findall(r'.*?public/p(\d+).webp', i)[0]) > 1770000000 | |
else i.replace('thumb', 'photo') | |
for i in imgUrls | |
] | |
for imgUrl in imgUrls: | |
imgData = fetch(imgUrl) | |
fileName = basename(urlsplit(imgUrl)[2]) | |
with open('douban/'+fileName, 'wb') as output: | |
output.write(imgData) | |
def download(url): | |
base = url | |
num = 0 | |
count = get_count(url) | |
start = 0 | |
while (start < count): | |
num += 1 | |
print("Downloading images in page %d ...") % num | |
down_img(url) | |
start += 18 | |
url = base + '?start=' + str(start) | |
print("Finished") | |
def input_url(): | |
link = r'(^https://www.douban.com/photos/album/\d+).*?' | |
if len(sys.argv) == 1: | |
inputString = raw_input('Enter album url --> ') | |
else: | |
inputString = sys.argv[1] | |
url = re.findall(link, inputString)[0] + '/' | |
return url | |
if __name__ == "__main__": | |
if (os.path.exists('douban') == False): | |
os.mkdir('douban') | |
download(input_url()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment