Skip to content

Instantly share code, notes, and snippets.

@fzliu
Created June 9, 2016 04:49
Show Gist options
  • Save fzliu/ea998d5f8cebed6aa9606ee2091c4a64 to your computer and use it in GitHub Desktop.
Save fzliu/ea998d5f8cebed6aa9606ee2091c4a64 to your computer and use it in GitHub Desktop.
A helper script for downloading images from URLs.
"""
download_pixels.py: A handy utility function for downloading images from URLs.
"""
import os
import shutil
import requests
MAX_GET_TRIES = 10
VALID_EXTENSIONS = [".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG"]
# requests
session = requests.Session()
session.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36"
def download_photo(url, out_dir="pixel_data"):
"""
Downloads a photo from a url.
"""
global session
# get the image (10 retries)
resp = None
for i in range(MAX_GET_TRIES):
try:
resp = session.get(url, stream=True)
break
except:
pass
# return if we couldn't get the image
if resp == None:
return False
# got a response, check its value
if resp.status_code == 200:
fname = resp.url.split("/")[-1]
ext = os.path.splitext(fname)[-1]
# some forwarding might happen - check that the file is still valid
if ext in VALID_EXTENSIONS:
path = os.path.join(out_dir, fname)
with open(path, "wb") as f:
resp.raw.decode_content = True
shutil.copyfileobj(resp.raw, f)
return True
return False
if __name__ == "__main__":
# parse some input data file here
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment