Created
May 27, 2012 22:52
-
-
Save crizCraig/2816295 to your computer and use it in GitHub Desktop.
Download images from Google Image search using Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import time | |
import requests | |
from PIL import Image | |
from StringIO import StringIO | |
from requests.exceptions import ConnectionError | |
def go(query, path): | |
"""Download full size images from Google image search. | |
Don't print or republish images without permission. | |
I used this to train a learning algorithm. | |
""" | |
BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\ | |
'v=1.0&q=' + query + '&start=%d' | |
BASE_PATH = os.path.join(path, query) | |
if not os.path.exists(BASE_PATH): | |
os.makedirs(BASE_PATH) | |
start = 0 # Google's start query string parameter for pagination. | |
while start < 60: # Google will only return a max of 56 results. | |
r = requests.get(BASE_URL % start) | |
for image_info in json.loads(r.text)['responseData']['results']: | |
url = image_info['unescapedUrl'] | |
try: | |
image_r = requests.get(url) | |
except ConnectionError, e: | |
print 'could not download %s' % url | |
continue | |
# Remove file-system path characters from name. | |
title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '') | |
file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'w') | |
try: | |
Image.open(StringIO(image_r.content)).save(file, 'JPEG') | |
except IOError, e: | |
# Throw away some gifs...blegh. | |
print 'could not save %s' % url | |
continue | |
finally: | |
file.close() | |
print start | |
start += 4 # 4 images per page. | |
# Be nice to Google and they'll be nice back :) | |
time.sleep(1.5) | |
# Example use | |
go('landscape', 'myDirectory') |
def recherche_image_phrase(self, phrase):
#nom commun + adjectif
self.phrase = phrase
liste = []
path = "https://www.google.co.in/search?q={0}&source=lnms&tbm=isch"
path1 = path.format(self.phrase)
requete = requests.get(path1)
page = requete.content
soup = BeautifulSoup(page, "html.parser")
propriete = soup.find_all("img")
with open("requete.py", "w") as file:
file.write(str(propriete))
with open("requete.py", "r") as file2:
b = file2.read()
liste.append(b)
#
for i in range(5):
a = str(liste).find(str("src"))
b = str(liste).find(str('" width='))
url = liste[0][a+2:b-3]
image = str("image_"+self.phrase+str(i)+".jpg")
liste[0] = liste[0][b:-3]
urllib.request.urlretrieve(str(url), image)
#
euh c'est pas pour du telechargement de masse mais tu peux prendre AU MOINS la page d'image :D juste a changer le range et au pire tu met un try exept au #
bah ca a pas tout mis:
en gros pour les # tu met un try except pour ignorer l'erreur !
The API is no longer available unfortunately.
TypeError: 'NoneType' object is not subscriptable
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
import random
import urllib.request
def web_image(url):
name=random.randrange(1,1000)
fullname= str(name)+".jpg"
urllib.request.urlretrieve(url,fullname)
web_image("https://www.tutorialspoint.com/python/images/python-mini.jpg")
please help me!!!!!!!!!!!!!!!!!
there is error in this code please short out this problem
this program of download an image from web using python