|
import os |
|
import json |
|
import requests |
|
from tqdm import tqdm |
|
from urllib.request import urlretrieve |
|
|
|
BASE_URL = 'https://api.figshare.com/v2' |
|
|
|
def download_collection(collection_id, destination): |
|
|
|
if not os.path.exists(destination): |
|
os.mkdir(destination) |
|
|
|
# get all articles for collection |
|
article_records = json.loads(requests.get(BASE_URL + f'/collections/{collection_id}/articles?page=1&page_size=1000').content) |
|
|
|
for article_record in tqdm(article_records, desc="articles"): |
|
|
|
# if article directory does not exist, create it |
|
article_directory = os.path.join(destination, article_record["title"]) |
|
if not os.path.exists(article_directory): |
|
os.mkdir(article_directory) |
|
|
|
# get all metadata for that article |
|
article_metadata = json.loads(requests.get(BASE_URL + f'/articles/{article_record["id"]}').content) |
|
|
|
# write metadata file |
|
metadata_filepath = os.path.join(article_directory, "metadata.json") |
|
if not os.path.exists(metadata_filepath): |
|
with open(metadata_filepath, "w") as metadata_file: |
|
json.dump( |
|
dict(NWBFile=dict(experiment_description=article_metadata["description"])), |
|
metadata_file, |
|
) |
|
|
|
# download data files |
|
file_records = article_metadata["files"] |
|
for file_record in tqdm(file_records, desc=f"files in article {article_record['title']}"): |
|
filepath = os.path.join(article_directory, file_record['name']) |
|
if os.path.exists(filepath) and os.path.getsize(filepath) == file_record["size"]: |
|
continue |
|
urlretrieve(file_record['download_url'], filepath) |
|
|
|
|
|
download_collection(5043830, "/Users/bendichter/Downloads/Schiavo2020") |