myanbin · December 28, 2015 13:49
diff --git a/spider.py b/spider.py
 #!/usr/bin/env python

 """This Spider find the images on http://jandan.net/ooxx and 
 download it to local directory"""

 import urllib
 import urllib2
 import re

 # Edit this variant to name the download images
 index = 1000

 def getImagesByUrl(url):

    global index
    html = urllib2.urlopen(url).read()

    regex = re.compile(r'<img src="(.+?)" />')
    imgUrls = regex.findall(html)


    for i in range(0, len(imgUrls)):

        localpath = '/cygdrive/c/nginx-1.2.7/html/21/images/'

        if imgUrls[i].endswith('.jpg') is True:
            print str(index) + '.jpg ' + imgUrls[i]
            localpath = localpath + str(index) + '.jpg';
        elif imgUrls[i].endswith('.gif') is True:
            print str(index) + '.gif ' + imgUrls[i]
            localpath = localpath + str(index) + '.gif';
        elif imgUrls[i].endswith('.png') is True:
            print str(index) + '.png ' + imgUrls[i]
            localpath = localpath + str(index) + '.png';
        else:
            continue

        try:
            urllib.urlretrieve(imgUrls[i], localpath)
            print 'ok'
        except:
            print 'fault'

        index = index + 1

 if __name__ == '__main__':
    for i in range(100, 150):
        getImagesByUrl('http://jandan.net/ooxx/page-' + str(i) + '#comments')
	#!/usr/bin/env python

	"""This Spider find the images on http://jandan.net/ooxx and
	download it to local directory"""

	import urllib
	import urllib2
	import re

	# Edit this variant to name the download images
	index = 1000

	def getImagesByUrl(url):

	global index
	html = urllib2.urlopen(url).read()

	regex = re.compile(r'<img src="(.+?)" />')
	imgUrls = regex.findall(html)


	for i in range(0, len(imgUrls)):

	localpath = '/cygdrive/c/nginx-1.2.7/html/21/images/'

	if imgUrls[i].endswith('.jpg') is True:
	print str(index) + '.jpg ' + imgUrls[i]
	localpath = localpath + str(index) + '.jpg';
	elif imgUrls[i].endswith('.gif') is True:
	print str(index) + '.gif ' + imgUrls[i]
	localpath = localpath + str(index) + '.gif';
	elif imgUrls[i].endswith('.png') is True:
	print str(index) + '.png ' + imgUrls[i]
	localpath = localpath + str(index) + '.png';
	else:
	continue

	try:
	urllib.urlretrieve(imgUrls[i], localpath)
	print 'ok'
	except:
	print 'fault'

	index = index + 1

	if __name__ == '__main__':
	for i in range(100, 150):
	getImagesByUrl('http://jandan.net/ooxx/page-' + str(i) + '#comments')