-
-
Save ShravanKaushik/900d1d8e25a7491be3e2 to your computer and use it in GitHub Desktop.
Download images from Google Image search using Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import time | |
import urllib | |
import requests | |
def go(query, path, min_width=640, min_height=480): | |
""" | |
Download full size images from Google image search. | |
Don't print or republish images without permission. | |
query -- Well, the query string | |
path -- Path to save the images to | |
min_width, min_height -- Optional width and height values to declare the | |
minimum dimensions of images to download. | |
""" | |
base_url = "https://ajax.googleapis.com/ajax/services/search/images?" \ | |
"v=1.0&q=" + query + "&start=%d" | |
base_path = os.path.join(path, query) | |
if not os.path.exists(base_path): | |
os.makedirs(base_path) | |
start = 0 # Google's start query string parameter for pagination. | |
while start < 60: # Google will only return a max of 56 results. | |
r = requests.get(base_url % start) | |
res = json.loads(r.text)['responseData']['results'] | |
for i, image_info in enumerate(res): | |
title = image_info['titleNoFormatting'] | |
# Remove invalid characters from file path | |
title = "".join(x for x in title if x.isalnum()) | |
filename = os.path.join(base_path, '%s.jpg') % title | |
if ((int(res[i]['width']) < min_width) or | |
(int(res[i]['height']) < min_height)): | |
continue | |
url = image_info['unescapedUrl'] | |
try: | |
urllib.urlretrieve(url, filename) | |
except Exception: | |
print 'Could not download %s' % url | |
continue | |
print start | |
start += 4 # 4 images per page. | |
time.sleep(1.5) | |
if __name__ == "__main__": | |
query = raw_input("Enter the search query> ") | |
path = raw_input("Enter the path to save the images" | |
"(enter . to use the current directory)> ") | |
go(query, path, 1024, 768) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment