Created
January 11, 2018 18:42
-
-
Save yu-iskw/f2951854b1daecb24b7642d0d2abd8ec to your computer and use it in GitHub Desktop.
Fast image downloader with concurrent
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import imghdr | |
import concurrent.futures | |
from urllib.parse import urlparse | |
from urllib.request import urlretrieve | |
def is_jpeg(path): | |
if imghdr.what(path) == 'jpeg': | |
return True | |
else: | |
return False | |
def get_image(url, basepath): | |
parsed = urlparse(url) | |
file_name = parsed.path.split('/')[-1] | |
jpeg_file_path = None | |
try: | |
file_name = os.path.join(basepath, file_name) | |
downloaded_file_path, headers = urlretrieve(url, file_name) | |
if is_jpeg(downloaded_file_path): | |
print("%s is downloading" % (url)) | |
jpeg_file_path = downloaded_file_path | |
except Exception as e: | |
print(e) | |
return jpeg_file_path | |
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as e: | |
url_list_path = './adidas-sneaker-image-urls-200.csv' | |
f = open(url_list_path, 'r') | |
for line in f: | |
url = line.strip() | |
e.submit(get_image, url, "./data/adidas/") | |
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as e: | |
url_list_path = './nike-sneaker-image-urls-200.csv' | |
f = open(url_list_path, 'r') | |
for line in f: | |
url = line.strip() | |
e.submit(get_image, url, "./data/nike/") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment