yue82 · June 7, 2016 08:04
diff --git a/crawl.py b/crawl.py
 import os
 import urllib
 import urllib2
 import argparse
 import json


 class Crawler(object):
    endpoint = 'https://api.photozou.jp/rest/search_public.json'
    imgurl = 'http://photozou.jp/p/img/'
    thumbrurl = 'http://photozou.jp/p/thumb/'
    maxlimit = 1000

    def __init__(self):
        self.digit = len(str(self.maxlimit))

    def search(self, keyword, isthumb, limit):
        if limit > self.maxlimit:
            limit = self.maxlimit
        self.digit = len(str(limit))

        data = {'keyword': keyword,
                'limit': limit}
        query = '{}?{}'.format(Crawler.endpoint, urllib.urlencode(data))

        res = urllib2.urlopen(query)
        resjson = json.loads(res.read())
        res.close()

        return [photo['photo_id'] for photo in resjson['info']['photo']]

    def fetch_img(self, ids, dirname, filename):
        for i, id in enumerate(ids):
            if filename is None:
                out_imgname = '{}.jpg'.format(str(id))
            else:
                out_img_format = '{{}}-{{:0>{}}}.jpg'.format(self.digit)
                out_imgname = out_img_format.format(filename, i)
            out_imgpath = '{}/{}'.format(dirname, out_imgname)
            query_url = '{}{}'.format(self.imgurl, id)
            redirect_url = urllib2.urlopen(query_url).geturl()
            res = urllib2.urlopen(redirect_url)
            with open(out_imgpath, 'wb') as fo:
                fo.write(res.read())
            res.close()

 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='')
    parser.add_argument('keyword',
                        help='image search keyword')
    args = parser.parse_args()

    keyword = args.keyword
    isthumb = True
    limit_num = 20
    dirname = './output'
    filename = keyword

    crawler = Crawler()
    ids = crawler.search(keyword, isthumb, limit_num)
    crawler.fetch_img(ids, dirname, filename)
	import os
	import urllib
	import urllib2
	import argparse
	import json


	class Crawler(object):
	endpoint = 'https://api.photozou.jp/rest/search_public.json'
	imgurl = 'http://photozou.jp/p/img/'
	thumbrurl = 'http://photozou.jp/p/thumb/'
	maxlimit = 1000

	def __init__(self):
	self.digit = len(str(self.maxlimit))

	def search(self, keyword, isthumb, limit):
	if limit > self.maxlimit:
	limit = self.maxlimit
	self.digit = len(str(limit))

	data = {'keyword': keyword,
	'limit': limit}
	query = '{}?{}'.format(Crawler.endpoint, urllib.urlencode(data))

	res = urllib2.urlopen(query)
	resjson = json.loads(res.read())
	res.close()

	return [photo['photo_id'] for photo in resjson['info']['photo']]

	def fetch_img(self, ids, dirname, filename):
	for i, id in enumerate(ids):
	if filename is None:
	out_imgname = '{}.jpg'.format(str(id))
	else:
	out_img_format = '{{}}-{{:0>{}}}.jpg'.format(self.digit)
	out_imgname = out_img_format.format(filename, i)
	out_imgpath = '{}/{}'.format(dirname, out_imgname)
	query_url = '{}{}'.format(self.imgurl, id)
	redirect_url = urllib2.urlopen(query_url).geturl()
	res = urllib2.urlopen(redirect_url)
	with open(out_imgpath, 'wb') as fo:
	fo.write(res.read())
	res.close()

	if __name__ == '__main__':
	parser = argparse.ArgumentParser(description='')
	parser.add_argument('keyword',
	help='image search keyword')
	args = parser.parse_args()

	keyword = args.keyword
	isthumb = True
	limit_num = 20
	dirname = './output'
	filename = keyword

	crawler = Crawler()
	ids = crawler.search(keyword, isthumb, limit_num)
	crawler.fetch_img(ids, dirname, filename)