Skip to content

Instantly share code, notes, and snippets.

@DashanGao
Created April 30, 2017 16:35
Show Gist options
  • Save DashanGao/8d6635dbd24e339c9f55276467b1ca1a to your computer and use it in GitHub Desktop.
Save DashanGao/8d6635dbd24e339c9f55276467b1ca1a to your computer and use it in GitHub Desktop.
This is a Douban Album image crawler
# encoding=utf8
import BeautifulSoup
import urllib
import os
initial_url = 'https://www.douban.com/photos/photo/2390342635/'
i = 0
def download(url):
res = urllib.urlopen(url)
soup = BeautifulSoup.BeautifulSoup(res)
img_list = soup.find(attrs={"class": "image-show"})
image_link = img_list.findAll(attrs={"class": "mainphoto"})
next_url = image_link[0]['href']
image = image_link[0].img['src']
print image
file_path = "C:/Users/Lenovo/Desktop/image"
img_data = urllib.urlopen(image).read()
file_name = file_path + "\\"+str(i)+".jpg"
if not os.path.exists(file_name):
output = open(file_name, 'wb+')
output.write(img_data)
output.close()
print "Finished download \n"
return next_url
this_url = initial_url
for i in range(60):
this_url = download(this_url)
i += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment