Created
August 5, 2023 07:51
-
-
Save joaoubaldo/ce75db527bb9b865bd8cc464160be19d to your computer and use it in GitHub Desktop.
ClassDojo media downloader (202308)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 1. From the browser, save each response from https://home.classdojo.com/api/storyFeed calls (browser's dev tools/network tab) as individual .json files in the same directory as this script | |
# 2. Run script | |
import requests | |
from multiprocessing.pool import ThreadPool | |
from urllib.parse import urlparse | |
import os | |
import json | |
def download_url(input): | |
prefix, url = input | |
print("downloading: ",url) | |
# assumes that the last segment after the / represents the file name | |
# if url is abc/xyz/file.txt, the file name will be file.txt | |
file_name_start_pos = url.rfind("/") + 1 | |
a = urlparse(url) | |
file_name = prefix+"-"+os.path.basename(a.path) | |
r = requests.get(url, stream=True) | |
if r.status_code == requests.codes.ok: | |
with open(file_name, 'wb') as f: | |
for data in r: | |
f.write(data) | |
else: | |
raise RuntimeError("Invalid response") | |
return url | |
if __name__ == '__main__': | |
files = [f for f in os.listdir(".") if '.json' in f and os.path.isfile(os.path.join(".", f))] | |
urls = [] | |
for fn in files: | |
c = open(fn, 'r') | |
x = json.load(c) | |
for i in x['_items']: | |
for idx, a in enumerate(i['contents']['attachments']): | |
urls.append((i['time'], a['path'])) | |
# Run 5 multiple threads. Each call will take the next element in urls list | |
results = ThreadPool(5).imap_unordered(download_url, urls) | |
for r in results: | |
print(r) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment