Skip to content

Instantly share code, notes, and snippets.

@theycallmeloki
Created December 10, 2022 09:38
Show Gist options
  • Save theycallmeloki/f7df653528ec9a13d5e7e625c704cefc to your computer and use it in GitHub Desktop.
Save theycallmeloki/f7df653528ec9a13d5e7e625c704cefc to your computer and use it in GitHub Desktop.
A quick and dirty way to add parallelization to a task in python, in this example, to download images from a huge json list
import json
import os
import uuid
from pathlib import Path
import requests
import threading
import concurrent.futures
def download(link, filelocation):
print(link, filelocation)
r = requests.get(link, stream=True)
with open(filelocation, 'wb') as f:
for chunk in r.iter_content(1024):
if chunk:
f.write(chunk)
return "written " + filelocation
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for i in json.loads(open('milady.json').read()):
ft = i["imageUrl"].split('?')[0][-3:]
fn = "files" + "/" + str(uuid.uuid4()) + "." + ft
futures.append(executor.submit(download, link=i["imageUrl"], filelocation=fn))
for future in concurrent.futures.as_completed(futures):
print(future.result())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment