Skip to content

Instantly share code, notes, and snippets.

@myanbin
Last active December 15, 2015 06:59
Show Gist options
  • Save myanbin/5220623 to your computer and use it in GitHub Desktop.
Save myanbin/5220623 to your computer and use it in GitHub Desktop.
爬取网页图片的Python脚本
#!usr/bin/python
import urllib
import urllib2
import re
# Edit this variant to name the download images
index = 6000
html = urllib2.urlopen('http://jandan.net/ooxx/page-600#comments').read()
regex = re.compile(r'<img src="(.+?)" />')
imgUrls = regex.findall(html)
for i in range(0, len(imgUrls)):
localpath = '/cygdrive/c/nginx-1.2.7/html/21/images/'
if imgUrls[i].endswith('.jpg') is True:
print str(index) + '.jpg ' + imgUrls[i]
localpath = localpath + str(index) + '.jpg';
elif imgUrls[i].endswith('.gif') is True:
print str(index) + '.gif ' + imgUrls[i]
localpath = localpath + str(index) + '.gif';
elif imgUrls[i].endswith('.png') is True:
print str(index) + '.png ' + imgUrls[i]
localpath = localpath + str(index) + '.png';
else:
continue
try:
urllib.urlretrieve(imgUrls[i], localpath)
print 'ok'
except:
print 'fault'
index = index + 1
print 'Done'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment