Created
          March 23, 2020 02:47 
        
      - 
      
- 
        Save oesteban/51e0302292d9a1d3090a2807c100a698 to your computer and use it in GitHub Desktop. 
    Sample OpenfMRI (scraps from *fMRIPrep*)
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # -*- coding: utf-8 -*- | |
| # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- | |
| # vi: set ft=python sts=4 ts=4 sw=4 et: | |
| """ | |
| A tool to sample OpenfMRI datasets using a datalad installation | |
| Please run this first: :: | |
| # install openfmri dataset | |
| datalad install -r ///openfmri | |
| # get all sidecar files | |
| datalad get -J8 $( find openfmri/ -name "*_T1w.json" ) | |
| datalad get -J8 $( find openfmri/ -name "*_T2w.json" ) | |
| datalad get -J8 $( find openfmri/ -name "*_bold.json" ) | |
| datalad get -J8 $( find openfmri/ -name "*_magnitude*.json" ) | |
| datalad get -J8 $( find openfmri/ -name "*_phase*.json" ) | |
| datalad get -J8 $( find openfmri/ -name "*_fieldmap.json" ) | |
| # list subjects | |
| cd openfmri | |
| """ | |
| import os | |
| import glob | |
| import numpy as np | |
| def get_parser(): | |
| """Build parser object""" | |
| from argparse import ArgumentParser | |
| from argparse import RawTextHelpFormatter | |
| parser = ArgumentParser( | |
| description='OpenfMRI participants sampler, for FMRIPREP\'s testing purposes', | |
| formatter_class=RawTextHelpFormatter) | |
| parser.add_argument('openfmri_dir', action='store', | |
| help='the root folder of a the openfmri dataset') | |
| # optional arguments | |
| parser.add_argument('-D', '--datalad_fetch', action='store_true', default=False, | |
| help='download sampled subjects') | |
| parser.add_argument('-o', '--output-file', action='store', help='write output file') | |
| parser.add_argument('-n', '--num-participants', action='store', type=int, default=4, | |
| help='number of participants randomly selected per dataset') | |
| parser.add_argument('--njobs', action='store', type=int, help='parallel downloads') | |
| parser.add_argument('--seed', action='store', type=int, default=20170914, | |
| help='seed for random number generation') | |
| return parser | |
| def main(): | |
| """Entry point""" | |
| thispath = os.getcwd() | |
| opts = get_parser().parse_args() | |
| np.random.seed(opts.seed) | |
| out_file = None | |
| if opts.output_file is not None: | |
| out_file = os.path.abspath(opts.output_file) | |
| os.chdir(opts.openfmri_dir) | |
| all_sub = sorted(glob.glob('ds*/sub-*')) | |
| datasets = {} | |
| multises = set() | |
| for subj in all_sub: | |
| ds = subj.split('/')[0] | |
| if os.path.isdir(os.path.join(subj, 'anat')) and os.path.isdir(os.path.join(subj, 'func')): | |
| datasets.setdefault(ds, []).append(os.path.basename(subj)) | |
| elif (glob.glob(os.path.join(subj, 'ses-*', 'anat')) and | |
| glob.glob(os.path.join(subj, 'ses-*', 'func'))): | |
| multises.add(ds) | |
| datasets.setdefault(ds, []).append(os.path.basename(subj)) | |
| subsample = {} | |
| n_sample = 0 | |
| for ds, sublist in datasets.items(): | |
| n_sample += min(opts.num_participants, len(sublist)) | |
| if len(sublist) <= opts.num_participants: | |
| subsample[ds] = sublist | |
| else: | |
| subsample[ds] = sorted(np.random.choice( | |
| sublist, size=opts.num_participants, replace=False).tolist()) | |
| # Double check everything looks good | |
| assert n_sample == len([sub for _, sublist in datasets.items() for sub in sublist]) | |
| if out_file is not None: | |
| import yaml | |
| with open(out_file, 'w') as outfh: | |
| outfh.write(yaml.dump(subsample)) | |
| print('Sampled participants stored to %s' % out_file) | |
| singleses = set(datasets.keys()) - multises | |
| print('Sampled %d participants' % n_sample) | |
| print('Datasets summary:\n\tSingle-session=%d' | |
| '\n\tMulti-session=%d' | |
| '\n\tTotal participants=%d' % (len(singleses), len(multises), n_sample)) | |
| os.chdir(thispath) | |
| if opts.datalad_fetch: | |
| import datalad.api as dlad | |
| for ds, sublist in subsample.items(): | |
| for sub in sublist: | |
| dlad.get(path=os.path.join(opts.openfmri_dir, ds, sub), | |
| recursive=True, jobs=opts.njobs, verbose=True) | |
| if __name__ == '__main__': | |
| main() | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # -*- coding: utf-8 -*- | |
| # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- | |
| # vi: set ft=python sts=4 ts=4 sw=4 et: | |
| """ | |
| A tool to generate a tasks_list.sh file for running fmriprep | |
| on subjects downloaded with datalad with sample_openfmri.py | |
| """ | |
| import os | |
| import glob | |
| CMDLINE = """\ | |
| {fmriprep_cmd} {bids_dir}/{dataset_dir} {output_dir}/{dataset_dir} participant \ | |
| -w {dataset_dir}/work --participant_label {participant_label} \ | |
| """ | |
| def get_parser(): | |
| """Build parser object""" | |
| from argparse import ArgumentParser | |
| from argparse import RawTextHelpFormatter | |
| parser = ArgumentParser( | |
| description='OpenfMRI participants sampler, for FMRIPREP\'s testing purposes', | |
| formatter_class=RawTextHelpFormatter) | |
| parser.add_argument('openfmri_dir', action='store', | |
| help='the root folder of a the openfmri dataset') | |
| parser.add_argument('output_dir', action='store', | |
| help='the directory where outputs should be stored') | |
| parser.add_argument('sample_file', action='store', | |
| help='a YAML file containing the subsample schedule') | |
| # optional arguments | |
| parser.add_argument('--anat-only', action='store_true', default=False, | |
| help='run only anatomical workflow') | |
| parser.add_argument('--nthreads', action='store', type=int, | |
| help='number of total threads') | |
| parser.add_argument('--omp_nthreads', action='store', type=int, | |
| help='number of threads for OMP-based interfaces') | |
| parser.add_argument('--mem-gb', action='store', type=int, | |
| help='available memory in GB') | |
| parser.add_argument('--tasks-list-file', default='tasks_list.sh', | |
| action='store', help='write output file') | |
| parser.add_argument('-t', '--tasks-filter', action='store', nargs='*', | |
| help='run only specific tasks') | |
| parser.add_argument('--cmd-call', action='store', help='command to be run') | |
| return parser | |
| def main(): | |
| """Entry point""" | |
| import yaml | |
| opts = get_parser().parse_args() | |
| with open(opts.sample_file) as sfh: | |
| sampledict = yaml.load(sfh) | |
| cmdline = CMDLINE | |
| if opts.anat_only: | |
| cmdline += ' --anat-only' | |
| if opts.nthreads: | |
| cmdline += '--nthreads %d' % opts.nthreads | |
| if opts.omp_nthreads: | |
| cmdline += '--omp-nthreads %d' % opts.omp_nthreads | |
| if opts.mem_gb: | |
| cmdline += '--mem_mb %d' % (opts.mem_gb * 1000) | |
| if opts.tasks_filter: | |
| cmdline += '-t %s' % ' '.join(opts.tasks_filter) | |
| fmriprep_cmd = 'fmriprep' | |
| if opts.cmd_call is None: | |
| singularity_dir = os.getenv('SINGULARITY_BIN') | |
| singularity_img = sorted( | |
| glob.glob(os.path.join(singularity_dir, 'poldracklab_fmriprep_*'))) | |
| if singularity_img: | |
| fmriprep_cmd = 'singularity run %s' % singularity_img[-1] | |
| task_cmds = [] | |
| # Try to make this Python 2 compatible | |
| try: | |
| os.makedirs(opts.output_dir) | |
| except OSError as e: | |
| if e.errno != os.errno.EEXIST: | |
| raise | |
| for dset, sublist in sampledict.items(): | |
| for sub in sublist: | |
| cmd = cmdline.format( | |
| fmriprep_cmd=fmriprep_cmd, | |
| bids_dir=opts.openfmri_dir, | |
| dataset_dir=dset, | |
| output_dir=opts.output_dir, | |
| participant_label=sub, | |
| ) | |
| task_cmds.append(cmd) | |
| with open(opts.tasks_list_file, 'w') as tlfile: | |
| tlfile.write('\n'.join(task_cmds)) | |
| if __name__ == '__main__': | |
| main() | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment