Skip to content

Instantly share code, notes, and snippets.

@yu-iskw
Created January 11, 2018 18:42
Show Gist options
  • Save yu-iskw/f2951854b1daecb24b7642d0d2abd8ec to your computer and use it in GitHub Desktop.
Save yu-iskw/f2951854b1daecb24b7642d0d2abd8ec to your computer and use it in GitHub Desktop.
Fast image downloader with concurrent
import os
import imghdr
import concurrent.futures
from urllib.parse import urlparse
from urllib.request import urlretrieve
def is_jpeg(path):
if imghdr.what(path) == 'jpeg':
return True
else:
return False
def get_image(url, basepath):
parsed = urlparse(url)
file_name = parsed.path.split('/')[-1]
jpeg_file_path = None
try:
file_name = os.path.join(basepath, file_name)
downloaded_file_path, headers = urlretrieve(url, file_name)
if is_jpeg(downloaded_file_path):
print("%s is downloading" % (url))
jpeg_file_path = downloaded_file_path
except Exception as e:
print(e)
return jpeg_file_path
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as e:
url_list_path = './adidas-sneaker-image-urls-200.csv'
f = open(url_list_path, 'r')
for line in f:
url = line.strip()
e.submit(get_image, url, "./data/adidas/")
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as e:
url_list_path = './nike-sneaker-image-urls-200.csv'
f = open(url_list_path, 'r')
for line in f:
url = line.strip()
e.submit(get_image, url, "./data/nike/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment