Skip to content

Instantly share code, notes, and snippets.

@ShravanKaushik
Forked from crizCraig/gist:2816295
Last active September 1, 2015 15:53
Show Gist options
  • Save ShravanKaushik/900d1d8e25a7491be3e2 to your computer and use it in GitHub Desktop.
Save ShravanKaushik/900d1d8e25a7491be3e2 to your computer and use it in GitHub Desktop.
Download images from Google Image search using Python
import os
import json
import time
import urllib
import requests
def go(query, path, min_width=640, min_height=480):
"""
Download full size images from Google image search.
Don't print or republish images without permission.
query -- Well, the query string
path -- Path to save the images to
min_width, min_height -- Optional width and height values to declare the
minimum dimensions of images to download.
"""
base_url = "https://ajax.googleapis.com/ajax/services/search/images?" \
"v=1.0&q=" + query + "&start=%d"
base_path = os.path.join(path, query)
if not os.path.exists(base_path):
os.makedirs(base_path)
start = 0 # Google's start query string parameter for pagination.
while start < 60: # Google will only return a max of 56 results.
r = requests.get(base_url % start)
res = json.loads(r.text)['responseData']['results']
for i, image_info in enumerate(res):
title = image_info['titleNoFormatting']
# Remove invalid characters from file path
title = "".join(x for x in title if x.isalnum())
filename = os.path.join(base_path, '%s.jpg') % title
if ((int(res[i]['width']) < min_width) or
(int(res[i]['height']) < min_height)):
continue
url = image_info['unescapedUrl']
try:
urllib.urlretrieve(url, filename)
except Exception:
print 'Could not download %s' % url
continue
print start
start += 4 # 4 images per page.
time.sleep(1.5)
if __name__ == "__main__":
query = raw_input("Enter the search query> ")
path = raw_input("Enter the path to save the images"
"(enter . to use the current directory)> ")
go(query, path, 1024, 768)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment