Skip to content

Instantly share code, notes, and snippets.

@crizCraig
Created May 27, 2012 22:52
Show Gist options
  • Save crizCraig/2816295 to your computer and use it in GitHub Desktop.
Save crizCraig/2816295 to your computer and use it in GitHub Desktop.
Download images from Google Image search using Python
import json
import os
import time
import requests
from PIL import Image
from StringIO import StringIO
from requests.exceptions import ConnectionError
def go(query, path):
"""Download full size images from Google image search.
Don't print or republish images without permission.
I used this to train a learning algorithm.
"""
BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\
'v=1.0&q=' + query + '&start=%d'
BASE_PATH = os.path.join(path, query)
if not os.path.exists(BASE_PATH):
os.makedirs(BASE_PATH)
start = 0 # Google's start query string parameter for pagination.
while start < 60: # Google will only return a max of 56 results.
r = requests.get(BASE_URL % start)
for image_info in json.loads(r.text)['responseData']['results']:
url = image_info['unescapedUrl']
try:
image_r = requests.get(url)
except ConnectionError, e:
print 'could not download %s' % url
continue
# Remove file-system path characters from name.
title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '')
file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'w')
try:
Image.open(StringIO(image_r.content)).save(file, 'JPEG')
except IOError, e:
# Throw away some gifs...blegh.
print 'could not save %s' % url
continue
finally:
file.close()
print start
start += 4 # 4 images per page.
# Be nice to Google and they'll be nice back :)
time.sleep(1.5)
# Example use
go('landscape', 'myDirectory')
@hellosaumil
Copy link

line 24, in crawl_images
for image_info in json.loads(r.text)['responseData']['results']:
TypeError: 'NoneType' object has no attribute 'getitem'

@guoshengkang
Copy link

TypeError: 'NoneType' object has no attribute 'getitem'

@OleksandrMalinin
Copy link

Guys, look for another solution as this APU is no longer available. You can see that if you add "print r.text" before for-loop:
{"responseData": null, "responseDetails": "This API is no longer available.", "responseStatus": 403}

@ajsunny
Copy link

ajsunny commented Feb 20, 2017

import random
import urllib.request

def web_image(url):
name=random.randrange(1,1000)
fullname= str(name)+".jpg"
urllib.request.urlretrieve(url,fullname)
web_image("https://www.tutorialspoint.com/python/images/python-mini.jpg")

please help me!!!!!!!!!!!!!!!!!
there is error in this code please short out this problem
this program of download an image from web using python

Copy link

ghost commented Dec 25, 2018

def recherche_image_phrase(self, phrase):
    #nom commun + adjectif
    
    self.phrase = phrase

    liste = []

    path =  "https://www.google.co.in/search?q={0}&source=lnms&tbm=isch"
    path1 = path.format(self.phrase)
    requete = requests.get(path1)
    page = requete.content
    soup = BeautifulSoup(page, "html.parser")  
    propriete = soup.find_all("img")
    
    with open("requete.py", "w") as file:
        file.write(str(propriete))
                
    with open("requete.py", "r") as file2:
        b = file2.read()
    liste.append(b)

     #
    for i in range(5):
        a = str(liste).find(str("src"))
        b = str(liste).find(str('" width='))
        
        url = liste[0][a+2:b-3]
        image = str("image_"+self.phrase+str(i)+".jpg")

        liste[0] = liste[0][b:-3]

        urllib.request.urlretrieve(str(url), image)
 #

euh c'est pas pour du telechargement de masse mais tu peux prendre AU MOINS la page d'image :D juste a changer le range et au pire tu met un try exept au #

Copy link

ghost commented Dec 25, 2018

bah ca a pas tout mis:
en gros pour les # tu met un try except pour ignorer l'erreur !

@Markon101
Copy link

The API is no longer available unfortunately.

TypeError: 'NoneType' object is not subscriptable

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment