joaoubaldo · August 5, 2023 07:51
diff --git a/classdojo_downloader.py b/classdojo_downloader.py
 # 1. From the browser, save each response from https://home.classdojo.com/api/storyFeed calls (browser's dev tools/network tab) as individual .json files in the same directory as this script
 # 2. Run script

 import requests
 from multiprocessing.pool import ThreadPool
 from urllib.parse import urlparse
 import os
 import json

 
 def download_url(input):
  prefix, url = input
  print("downloading: ",url)
  # assumes that the last segment after the / represents the file name
  # if url is abc/xyz/file.txt, the file name will be file.txt
  file_name_start_pos = url.rfind("/") + 1
  a = urlparse(url)
  file_name = prefix+"-"+os.path.basename(a.path)
 
  r = requests.get(url, stream=True)
  if r.status_code == requests.codes.ok:
    with open(file_name, 'wb') as f:
      for data in r:
        f.write(data)
  else:
     raise RuntimeError("Invalid response")
  return url

 if __name__ == '__main__':
  files = [f for f in os.listdir(".") if '.json' in f and os.path.isfile(os.path.join(".", f))]
  urls = []

  for fn in files:
      c = open(fn, 'r')
      x = json.load(c)
      for i in x['_items']:
          for idx, a in enumerate(i['contents']['attachments']):
              urls.append((i['time'], a['path']))

  # Run 5 multiple threads. Each call will take the next element in urls list
  results = ThreadPool(5).imap_unordered(download_url, urls)
  for r in results:
      print(r)
	# 1. From the browser, save each response from https://home.classdojo.com/api/storyFeed calls (browser's dev tools/network tab) as individual .json files in the same directory as this script
	# 2. Run script

	import requests
	from multiprocessing.pool import ThreadPool
	from urllib.parse import urlparse
	import os
	import json


	def download_url(input):
	prefix, url = input
	print("downloading: ",url)
	# assumes that the last segment after the / represents the file name
	# if url is abc/xyz/file.txt, the file name will be file.txt
	file_name_start_pos = url.rfind("/") + 1
	a = urlparse(url)
	file_name = prefix+"-"+os.path.basename(a.path)

	r = requests.get(url, stream=True)
	if r.status_code == requests.codes.ok:
	with open(file_name, 'wb') as f:
	for data in r:
	f.write(data)
	else:
	raise RuntimeError("Invalid response")
	return url

	if __name__ == '__main__':
	files = [f for f in os.listdir(".") if '.json' in f and os.path.isfile(os.path.join(".", f))]
	urls = []

	for fn in files:
	c = open(fn, 'r')
	x = json.load(c)
	for i in x['_items']:
	for idx, a in enumerate(i['contents']['attachments']):
	urls.append((i['time'], a['path']))

	# Run 5 multiple threads. Each call will take the next element in urls list
	results = ThreadPool(5).imap_unordered(download_url, urls)
	for r in results:
	print(r)