Skip to content

Instantly share code, notes, and snippets.

@yuitest
Last active August 29, 2015 13:59
Show Gist options
  • Save yuitest/10588181 to your computer and use it in GitHub Desktop.
Save yuitest/10588181 to your computer and use it in GitHub Desktop.
derpibooru から、 JSON と画像を取得する。ループが 3 秒ぐらいになってる。負荷を掛けないように各自要調整。
# coding: utf8
from __future__ import division, print_function, unicode_literals
import json
import mimetypes
import os
import requests
class DerpibooruDocument(object):
def __init__(self, json_dict):
self._data = json_dict
self._image_request = None
def image_url(self):
return 'http:' + self._data['image']
def _fetch_image(self):
if self._image_request is not None:
return self._image_request
r = requests.get(self.image_url(), stream=True)
if r.status_code != 200:
r.raise_for_status()
self._image_request = r
return r
def guess_fetched_image_ext(self):
r = self._fetch_image()
ct = r.headers['content-type']
ext = mimetypes.guess_extension(ct)
if ext is not None:
return ext
path, ext = os.path.splitext(r.url)
return ext
def save_image(self, fo):
r = self._fetch_image()
for chunk in r.iter_content(1024):
fo.write(chunk)
@classmethod
def from_number(cls, num):
url = 'http://derpibooru.org/{}.json'.format(num)
return cls.from_url(url)
@classmethod
def from_url(cls, url):
j = requests.get(url).json()
return cls(j)
def to_file(self, fo):
json.dump(self._data, fo)
if __name__ == '__main__':
import time
try:
os.makedirs('images')
os.makedirs('jsons')
except OSError:
pass
for num in xrange(0, 599500):
time.sleep(1)
try:
d = DerpibooruDocument.from_number(num)
except Exception:
continue
if 'image' not in d._data:
continue
with open('jsons/{}.json'.format(num), 'wb') as fo:
d.to_file(fo)
try:
ext = d.guess_fetched_image_ext()
except Exception:
continue
with open('images/{}{}'.format(num, ext), 'wb') as fio:
d.save_image(fio)
time.sleep(2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment