Last active
December 20, 2015 06:29
-
-
Save zippera/6086025 to your computer and use it in GitHub Desktop.
python爬虫,下载点点「美女」标签下的大图。 更新:缩短文件名;排除杂图;改善提示信息 使用方法:新建一个文件夹,把代码保存为name.py文件,运行 「python name.py」就可以把图片下载到文件夹。 计划:使用压缩;使用多线程;创建相对路径文件夹。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| #--------------------------------------- | |
| # 程序:点点美女图片爬虫 | |
| # 版本:0.2 | |
| # 作者:zippera | |
| # 日期:2013-07-26 | |
| # 语言:Python 2.7 | |
| # 说明:能设置下载的页数 | |
| #--------------------------------------- | |
| import urllib2 | |
| import urllib | |
| import re | |
| pat = re.compile('<div class="feed-big-img">\n.*?imgsrc="(ht.*?)\".*?') | |
| nexturl1 = "http://www.diandian.com/tag/%E7%BE%8E%E5%A5%B3?page=" | |
| count = 1 | |
| while count < 2: | |
| print "Page " + str(count) + "\n" | |
| myurl = nexturl1 + str(count) | |
| myres = urllib2.urlopen(myurl) | |
| mypage = myres.read() | |
| ucpage = mypage.decode("utf-8") #转码 | |
| mat = pat.findall(ucpage) | |
| if len(mat): | |
| cnt = 1 | |
| for item in mat: | |
| print "Page" + str(count) + " No." + str(cnt) + " url: " + item + "\n" | |
| cnt += 1 | |
| fnp = re.compile('(\w{10}\.\w+)$') | |
| fnr = fnp.findall(item) | |
| if fnr: | |
| fname = fnr[0] | |
| urllib.urlretrieve(item, fname) | |
| else: | |
| print "no data" | |
| count += 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment