woldr001 · September 12, 2015 21:38
diff --git a/ImageScrape.py b/ImageScrape.py
 import json
 import os
 import time
 import requests
 from PIL import Image       #If PIL is unavailable, use Pillow
 from StringIO import StringIO
 from requests.exceptions import ConnectionError


 def go(query, path):
  """Download full size images from Google image search.
  """
  BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\
             'v=1.0&q=' + query + &start=%d&imgsz=large'

  BASE_PATH = os.path.join(path, query)

  if not os.path.exists(BASE_PATH):
    os.makedirs(BASE_PATH)

  start = 0 # Google's start query string parameter for pagination.
  while start < 8: # Google will only return a max of 56 results.
    r = requests.get(BASE_URL % start)
    for image_info in json.loads(r.text)['responseData']['results']:
      url = image_info['unescapedUrl']
      try:
        image_r = requests.get(url)
      except ConnectionError, e:
        print 'could not download %s' % url
        continue

      # Remove file-system path characters from name.
      
      title = image_info['titleNoFormatting']
      title = "".join(x for x in title if x.isalnum())
      
      # use 'wb' to write file in binary mode
      file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'wb')
      try:
        Image.open(StringIO(image_r.content)).save(file, 'JPEG')
      except IOError, e:
        # Throw away some gifs...blegh.
        print 'could not save %s' % url
        continue
      finally:
        file.close()

    print start
    start += 4 # 4 images per page.

    # Be nice to Google and they'll be nice back :)
    time.sleep(1.5)

 # Example use
 go('OK Computer', 'myDirectory')
	import json
	import os
	import time
	import requests
	from PIL import Image #If PIL is unavailable, use Pillow
	from StringIO import StringIO
	from requests.exceptions import ConnectionError


	def go(query, path):
	"""Download full size images from Google image search.
	"""
	BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\
	'v=1.0&q=' + query + &start=%d&imgsz=large'

	BASE_PATH = os.path.join(path, query)

	if not os.path.exists(BASE_PATH):
	os.makedirs(BASE_PATH)

	start = 0 # Google's start query string parameter for pagination.
	while start < 8: # Google will only return a max of 56 results.
	r = requests.get(BASE_URL % start)
	for image_info in json.loads(r.text)['responseData']['results']:
	url = image_info['unescapedUrl']
	try:
	image_r = requests.get(url)
	except ConnectionError, e:
	print 'could not download %s' % url
	continue

	# Remove file-system path characters from name.

	title = image_info['titleNoFormatting']
	title = "".join(x for x in title if x.isalnum())

	# use 'wb' to write file in binary mode
	file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'wb')
	try:
	Image.open(StringIO(image_r.content)).save(file, 'JPEG')
	except IOError, e:
	# Throw away some gifs...blegh.
	print 'could not save %s' % url
	continue
	finally:
	file.close()

	print start
	start += 4 # 4 images per page.

	# Be nice to Google and they'll be nice back :)
	time.sleep(1.5)

	# Example use
	go('OK Computer', 'myDirectory')
No results found