-
-
Save crizCraig/2816295 to your computer and use it in GitHub Desktop.
import json | |
import os | |
import time | |
import requests | |
from PIL import Image | |
from StringIO import StringIO | |
from requests.exceptions import ConnectionError | |
def go(query, path): | |
"""Download full size images from Google image search. | |
Don't print or republish images without permission. | |
I used this to train a learning algorithm. | |
""" | |
BASE_URL = 'https://ajax.googleapis.com/ajax/services/search/images?'\ | |
'v=1.0&q=' + query + '&start=%d' | |
BASE_PATH = os.path.join(path, query) | |
if not os.path.exists(BASE_PATH): | |
os.makedirs(BASE_PATH) | |
start = 0 # Google's start query string parameter for pagination. | |
while start < 60: # Google will only return a max of 56 results. | |
r = requests.get(BASE_URL % start) | |
for image_info in json.loads(r.text)['responseData']['results']: | |
url = image_info['unescapedUrl'] | |
try: | |
image_r = requests.get(url) | |
except ConnectionError, e: | |
print 'could not download %s' % url | |
continue | |
# Remove file-system path characters from name. | |
title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '') | |
file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'w') | |
try: | |
Image.open(StringIO(image_r.content)).save(file, 'JPEG') | |
except IOError, e: | |
# Throw away some gifs...blegh. | |
print 'could not save %s' % url | |
continue | |
finally: | |
file.close() | |
print start | |
start += 4 # 4 images per page. | |
# Be nice to Google and they'll be nice back :) | |
time.sleep(1.5) | |
# Example use | |
go('landscape', 'myDirectory') |
The images which are downloaded are just stripes of colour.Any specific reason?
IOError: [Errno 22] invalid mode ('w') or filename: u'myDirectory\landscape\Na
ture - Photo gallery | MIRIADNA.COM.jpg'
@vinayshekhar000, @innermond, @fieldse, @crizCraig this is the error thaat showed any help?
pretty new to python pardon my ignorance.
@senam1 Substitute the line title = image_info['titleNoFormatting'].replace('/', '').replace('\\', '')
with the following
title = image_info['titleNoFormatting']
title = "".join(x for x in title if x.isalnum())
@vinayshekhar000 To remove the stripes that you described, indicating 'binary' is important. Try replacing
file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'w')
with the following:
file = open(os.path.join(BASE_PATH, '%s.jpg') % title, 'wb')
Cheers!
How can one take >56 images?
Traceback (most recent call last):
File "gid.py", line 100, in <module>
soup = get_soup(url,header)
File "gid.py", line 89, in get_soup
return BeautifulSoup(urllib2.urlopen(urllib2.Request(url,headers=header)))
File "PYTHON_DIR/lib/urllib2.py", line 154, in urlopen
return opener.open(url, data, timeout)
File "PYTHON_DIR/lib/urllib2.py", line 437, in open
response = meth(req, response)
File "PYTHON_DIR/lib/urllib2.py", line 550, in http_response
'http', request, response, code, msg, hdrs)
File "PYTHON_DIR/lib/urllib2.py", line 475, in error
return self._call_chain(*args)
File "PYTHON_DIR/lib/urllib2.py", line 409, in _call_chain
result = func(*args)
File "PYTHON_DIR/lib/urllib2.py", line 558, in http_error_default
raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
urllib2.HTTPError: HTTP Error 404: Not Found
no longer service support
in go
for image_info in json.loads(r.text)['responseData']['results']:
TypeError: 'NoneType' object has no attribute '__getitem__'
{"responseData": null, "responseDetails": "This API is no longer available.", "responseStatus": 403}
bummer :(
line 24, in crawl_images
for image_info in json.loads(r.text)['responseData']['results']:
TypeError: 'NoneType' object has no attribute 'getitem'
TypeError: 'NoneType' object has no attribute 'getitem'
Guys, look for another solution as this APU is no longer available. You can see that if you add "print r.text" before for-loop:
{"responseData": null, "responseDetails": "This API is no longer available.", "responseStatus": 403}
import random
import urllib.request
def web_image(url):
name=random.randrange(1,1000)
fullname= str(name)+".jpg"
urllib.request.urlretrieve(url,fullname)
web_image("https://www.tutorialspoint.com/python/images/python-mini.jpg")
please help me!!!!!!!!!!!!!!!!!
there is error in this code please short out this problem
this program of download an image from web using python
def recherche_image_phrase(self, phrase):
#nom commun + adjectif
self.phrase = phrase
liste = []
path = "https://www.google.co.in/search?q={0}&source=lnms&tbm=isch"
path1 = path.format(self.phrase)
requete = requests.get(path1)
page = requete.content
soup = BeautifulSoup(page, "html.parser")
propriete = soup.find_all("img")
with open("requete.py", "w") as file:
file.write(str(propriete))
with open("requete.py", "r") as file2:
b = file2.read()
liste.append(b)
#
for i in range(5):
a = str(liste).find(str("src"))
b = str(liste).find(str('" width='))
url = liste[0][a+2:b-3]
image = str("image_"+self.phrase+str(i)+".jpg")
liste[0] = liste[0][b:-3]
urllib.request.urlretrieve(str(url), image)
#
euh c'est pas pour du telechargement de masse mais tu peux prendre AU MOINS la page d'image :D juste a changer le range et au pire tu met un try exept au #
bah ca a pas tout mis:
en gros pour les # tu met un try except pour ignorer l'erreur !
The API is no longer available unfortunately.
TypeError: 'NoneType' object is not subscriptable
@fieldse: try with python2 binary. I had no issues at all. Useful script.