fzliu · June 9, 2016 04:49
diff --git a/download_pixels.py b/download_pixels.py
 """
 download_pixels.py: A handy utility function for downloading images from URLs.
 """

 import os
 import shutil

 import requests


 MAX_GET_TRIES = 10
 VALID_EXTENSIONS = [".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG"]


 # requests
 session = requests.Session()
 session.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"


 def download_photo(url, out_dir="pixel_data"):
    """
        Downloads a photo from a url.
    """

    global session

    # get the image (10 retries)
    resp = None
    for i in range(MAX_GET_TRIES):
        try:
            resp = session.get(url, stream=True)
            break
        except:
            pass

    # return if we couldn't get the image
    if resp == None:
        return False

    # got a response, check its value
    if resp.status_code == 200:
        fname = resp.url.split("/")[-1]
        ext = os.path.splitext(fname)[-1]
 
        # some forwarding might happen - check that the file is still valid
        if ext in VALID_EXTENSIONS:
            path = os.path.join(out_dir, fname)
            with open(path, "wb") as f:
                resp.raw.decode_content = True
                shutil.copyfileobj(resp.raw, f)
                return True

    return False


 if __name__ == "__main__":

    # parse some input data file here
    pass
	"""
	download_pixels.py: A handy utility function for downloading images from URLs.
	"""

	import os
	import shutil

	import requests


	MAX_GET_TRIES = 10
	VALID_EXTENSIONS = [".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG"]


	# requests
	session = requests.Session()
	session.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"


	def download_photo(url, out_dir="pixel_data"):
	"""
	Downloads a photo from a url.
	"""

	global session

	# get the image (10 retries)
	resp = None
	for i in range(MAX_GET_TRIES):
	try:
	resp = session.get(url, stream=True)
	break
	except:
	pass

	# return if we couldn't get the image
	if resp == None:
	return False

	# got a response, check its value
	if resp.status_code == 200:
	fname = resp.url.split("/")[-1]
	ext = os.path.splitext(fname)[-1]

	# some forwarding might happen - check that the file is still valid
	if ext in VALID_EXTENSIONS:
	path = os.path.join(out_dir, fname)
	with open(path, "wb") as f:
	resp.raw.decode_content = True
	shutil.copyfileobj(resp.raw, f)
	return True

	return False


	if __name__ == "__main__":

	# parse some input data file here
	pass