Skip to content

Instantly share code, notes, and snippets.

@rom1504
Created June 12, 2021 14:51
Show Gist options
  • Save rom1504/02295a4612d102ae4637258bd0a9c35c to your computer and use it in GitHub Desktop.
Save rom1504/02295a4612d102ae4637258bd0a9c35c to your computer and use it in GitHub Desktop.
download_resize_when_necessary.py
from multiprocessing import Pool
from tqdm import tqdm
import csv
import cv2
import os
import urllib.request
import hashlib
IMAGE_SIZE = 1280
IMAGE_FORMAT = 'jpg'
IMAGE_DIR = 'images'
if not os.path.exists(IMAGE_DIR):
os.mkdir(IMAGE_DIR)
images_to_dl = []
with open('urls.txt', encoding='utf-8') as file:
lines = file.readlines()
for i, line in tqdm(enumerate(lines)):
url = line
filename = f'{IMAGE_DIR}/{i}.jpg'
images_to_dl.append((url, filename))
def resize_with_border(im, desired_size):
if im.shape[0] <= desired_size and im.shape[1] <= desired_size:
return im
old_size = im.shape[:2] # old_size is in (height, width) format
ratio = float(desired_size)/max(old_size)
new_size = tuple([int(x*ratio) for x in old_size])
# new_size should be in (width, height) format
im = cv2.resize(im, (new_size[1], new_size[0]))
delta_w = desired_size - new_size[1]
delta_h = desired_size - new_size[0]
top, bottom = delta_h//2, delta_h-(delta_h//2)
left, right = delta_w//2, delta_w-(delta_w//2)
color = [255, 255, 255]
new_im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT,
value=color)
return new_im
def process_image(row):
url, filename = row
if os.path.exists(filename):
return
try:
request = urllib.request.Request(
url,
data=None,
headers={'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:72.0) Gecko/20100101 Firefox/72.0'}
)
content = urllib.request.urlopen(request, timeout=10).read()
with open(filename, 'wb') as outfile:
outfile.write(content)
img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
img = resize_with_border(img, IMAGE_SIZE)
cv2.imwrite(filename, img)
except Exception as e:
# todo remove
if os.path.exists(filename):
os.remove(filename)
pass
images_to_dl = images_to_dl
pool = Pool(1024)
for _ in tqdm(pool.imap_unordered(process_image, images_to_dl), total=len(images_to_dl)):
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment