Created
March 30, 2021 05:24
-
-
Save mkyt/503b4209efefbf67ce21b23288d77240 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""HCP Downloader | |
This script fetches preprocessed BOLD timeseries in grayordinate space | |
for all subjects from Human Connectome Project S3 repository | |
""" | |
import sys | |
import os | |
import os.path | |
import itertools | |
import boto3 | |
from botocore.exceptions import ClientError | |
s3 = boto3.client( | |
's3', | |
aws_access_key_id='YOUR_ACCESS_KEY_ID', | |
aws_secret_access_key='YOUR_SECRET_ACCESS_KEY' | |
) | |
BUCKET_NAME = 'hcp-openaccess' | |
PREFIX = 'HCP_1200/' | |
def list_subjects(): | |
res = [] | |
p = s3.get_paginator('list_objects_v2') | |
for page in p.paginate(Bucket=BUCKET_NAME, Prefix=PREFIX, Delimiter='/'): | |
for pfx in page['CommonPrefixes']: | |
res.append(pfx['Prefix'][len(PREFIX):-1]) | |
return res | |
def list_files(subj_id): | |
res = [] | |
p = s3.get_paginator('list_objects_v2') | |
for page in p.paginate(Bucket=BUCKET_NAME, Prefix=PREFIX+subj_id+'/'): | |
res += map(lambda x: x['Key'], page['Contents']) | |
return res | |
def download_file(subj_id, key_path, out_path): | |
print(f'obtaining {key_path} for subject "{subj_id}"...') | |
if os.path.exists(out_path): | |
print('file already exists. skipping...') | |
return | |
idx = out_path.rfind(os.sep) | |
if idx >= 0: | |
os.makedirs(out_path[:idx], exist_ok=True) | |
try: | |
with open(out_path, 'wb') as fp: | |
s3.download_fileobj(BUCKET_NAME, PREFIX+subj_id+'/'+key_path, fp) | |
except ClientError: | |
print('file not found. skipping...') | |
os.remove(out_path) | |
def download_grayordinate_bold(subj_id, base_path): | |
for sess, lr in itertools.product(('1', '2'), ('LR', 'RL')): | |
path = f'MNINonLinear/Results/rfMRI_REST{sess}_{lr}/rfMRI_REST{sess}_{lr}_Atlas_MSMAll_hp2000_clean.dtseries.nii' | |
download_file(subj_id, path, base_path+os.sep+subj_id+os.sep+f'rfMRI_{sess}_{lr}.dtseries.nii') | |
if __name__ == '__main__': | |
if len(sys.argv) > 1: | |
base_path = sys.argv[1] | |
else: | |
base_path = '.' | |
subjects = list_subjects() | |
for subj in subjects: | |
download_grayordinate_bold(subj, base_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment