bendichter · April 27, 2022 21:36
diff --git a/download_figshare_collection.py b/download_figshare_collection.py
 import os
 import json
 import requests
 from tqdm import tqdm
 from urllib.request import urlretrieve

 BASE_URL = 'https://api.figshare.com/v2'

 def download_collection(collection_id, destination):
    
    if not os.path.exists(destination):
        os.mkdir(destination)
    
    # get all articles for collection
    article_records = json.loads(requests.get(BASE_URL + f'/collections/{collection_id}/articles?page=1&page_size=1000').content)
    
    for article_record in tqdm(article_records, desc="articles"):
        
        # if article directory does not exist, create it
        article_directory = os.path.join(destination, article_record["title"])
        if not os.path.exists(article_directory):
            os.mkdir(article_directory)
        
        # get all metadata for that article
        article_metadata = json.loads(requests.get(BASE_URL + f'/articles/{article_record["id"]}').content)
        
        # write metadata file
        metadata_filepath = os.path.join(article_directory, "metadata.json")
        if not os.path.exists(metadata_filepath):
            with open(metadata_filepath, "w") as metadata_file:
                json.dump(
                    dict(NWBFile=dict(experiment_description=article_metadata["description"])),
                    metadata_file,
                )
        
        # download data files
        file_records = article_metadata["files"]
        for file_record in tqdm(file_records, desc=f"files in article {article_record['title']}"):
            filepath = os.path.join(article_directory, file_record['name'])
            if os.path.exists(filepath) and os.path.getsize(filepath) == file_record["size"]:
                continue
            urlretrieve(file_record['download_url'], filepath)
            
            
 download_collection(5043830, "/Users/bendichter/Downloads/Schiavo2020")
	import os
	import json
	import requests
	from tqdm import tqdm
	from urllib.request import urlretrieve

	BASE_URL = 'https://api.figshare.com/v2'

	def download_collection(collection_id, destination):

	if not os.path.exists(destination):
	os.mkdir(destination)

	# get all articles for collection
	article_records = json.loads(requests.get(BASE_URL + f'/collections/{collection_id}/articles?page=1&page_size=1000').content)

	for article_record in tqdm(article_records, desc="articles"):

	# if article directory does not exist, create it
	article_directory = os.path.join(destination, article_record["title"])
	if not os.path.exists(article_directory):
	os.mkdir(article_directory)

	# get all metadata for that article
	article_metadata = json.loads(requests.get(BASE_URL + f'/articles/{article_record["id"]}').content)

	# write metadata file
	metadata_filepath = os.path.join(article_directory, "metadata.json")
	if not os.path.exists(metadata_filepath):
	with open(metadata_filepath, "w") as metadata_file:
	json.dump(
	dict(NWBFile=dict(experiment_description=article_metadata["description"])),
	metadata_file,
	)

	# download data files
	file_records = article_metadata["files"]
	for file_record in tqdm(file_records, desc=f"files in article {article_record['title']}"):
	filepath = os.path.join(article_directory, file_record['name'])
	if os.path.exists(filepath) and os.path.getsize(filepath) == file_record["size"]:
	continue
	urlretrieve(file_record['download_url'], filepath)


	download_collection(5043830, "/Users/bendichter/Downloads/Schiavo2020")