Skip to content

Instantly share code, notes, and snippets.

@agassiyzh
Created August 12, 2012 08:23
Show Gist options
  • Save agassiyzh/3330667 to your computer and use it in GitHub Desktop.
Save agassiyzh/3330667 to your computer and use it in GitHub Desktop.
豆瓣相册下载脚本
'''
Created on Aug 7, 2011
@author: Agassi
'''
#_*_encoding:utf-8_*_
from lxml import html
import os
import urllib2
def format_html_from_url(url):
response = urllib2.urlopen(url)
content = response.read()
doc=html.document_fromstring(content)
return doc
if __name__=='__main__':
url = raw_input("Album url:>>>")
#url='http://www.douban.com/photos/album/75440685/'
doc = format_html_from_url(url)
photolst = doc.xpath('//div[@class="photolst clearfix"]/div/a/img/@src')
for url in photolst:
url = url.replace('thumb','photo')
os.system('wget -nc -P album ' + url)
other_page_urls = doc.xpath('//div[@class="paginator"]/a/@href')
for url in other_page_urls:
other_doc = format_html_from_url(url)
o_lst = other_doc.xpath('//div[@class="photolst clearfix"]/div/a/img/@src')
for p_url in o_lst:
p_url = p_url.replace('thumb','photo')
os.system('wget -nc -P album ' + p_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment