Forked from DollarAkshay/multithreaded_image_scrapper.py
Created
April 25, 2018 14:20
-
-
Save malefs/69bc54a1791d4d85bdeddca8859d3ff2 to your computer and use it in GitHub Desktop.
Multithreaded Image Scraper in Python (Insanely fast on Ryzen 7)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import cv2 | |
| import numpy as np | |
| import urllib.request | |
| import time | |
| import threading | |
| import math | |
| def getPokemon(start, end): | |
| print("Started worker for range :", start, "to", end) | |
| for i in range(start, end): | |
| try: | |
| url = 'https://assets.pokemon.com/assets/cms2/img/pokedex/detail/' + \ | |
| '{:03d}'.format(i) + '.png' | |
| request = urllib.request.Request(url) | |
| response = urllib.request.urlopen(request) | |
| binary_str = response.read() | |
| byte_array = bytearray(binary_str) | |
| numpy_array = np.asarray(byte_array, dtype="uint8") | |
| image = cv2.imdecode(numpy_array, cv2.IMREAD_UNCHANGED) | |
| cv2.imwrite("images/" + '{:04d}'.format(i) + '.png', image) | |
| print("Saved " + '{:04d}'.format(i) + '.png') | |
| except Exception as e: | |
| print(str(e)) | |
| start_time = time.time() | |
| thread_count = 16 | |
| image_count = 801 | |
| thread_list = [] | |
| for i in range(thread_count): | |
| start = math.floor(i * image_count / thread_count) + 1 | |
| end = math.floor((i + 1) * image_count / thread_count) + 1 | |
| thread_list.append(threading.Thread(target=getPokemon, args=(start, end))) | |
| for thread in thread_list: | |
| thread.start() | |
| for thread in thread_list: | |
| thread.join() | |
| end_time = time.time() | |
| print("Done") | |
| print("Time taken : " + str(end_time - start_time) + "sec") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment