Skip to content

Instantly share code, notes, and snippets.

@joaoubaldo
Created August 5, 2023 07:51
Show Gist options
  • Save joaoubaldo/ce75db527bb9b865bd8cc464160be19d to your computer and use it in GitHub Desktop.
Save joaoubaldo/ce75db527bb9b865bd8cc464160be19d to your computer and use it in GitHub Desktop.
ClassDojo media downloader (202308)
# 1. From the browser, save each response from https://home.classdojo.com/api/storyFeed calls (browser's dev tools/network tab) as individual .json files in the same directory as this script
# 2. Run script
import requests
from multiprocessing.pool import ThreadPool
from urllib.parse import urlparse
import os
import json
def download_url(input):
prefix, url = input
print("downloading: ",url)
# assumes that the last segment after the / represents the file name
# if url is abc/xyz/file.txt, the file name will be file.txt
file_name_start_pos = url.rfind("/") + 1
a = urlparse(url)
file_name = prefix+"-"+os.path.basename(a.path)
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
with open(file_name, 'wb') as f:
for data in r:
f.write(data)
else:
raise RuntimeError("Invalid response")
return url
if __name__ == '__main__':
files = [f for f in os.listdir(".") if '.json' in f and os.path.isfile(os.path.join(".", f))]
urls = []
for fn in files:
c = open(fn, 'r')
x = json.load(c)
for i in x['_items']:
for idx, a in enumerate(i['contents']['attachments']):
urls.append((i['time'], a['path']))
# Run 5 multiple threads. Each call will take the next element in urls list
results = ThreadPool(5).imap_unordered(download_url, urls)
for r in results:
print(r)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment