Skip to content

Instantly share code, notes, and snippets.

@oesteban
Created March 23, 2020 02:47
Show Gist options
  • Save oesteban/51e0302292d9a1d3090a2807c100a698 to your computer and use it in GitHub Desktop.
Save oesteban/51e0302292d9a1d3090a2807c100a698 to your computer and use it in GitHub Desktop.
Sample OpenfMRI (scraps from *fMRIPrep*)
# -*- coding: utf-8 -*-
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""
A tool to sample OpenfMRI datasets using a datalad installation
Please run this first: ::
# install openfmri dataset
datalad install -r ///openfmri
# get all sidecar files
datalad get -J8 $( find openfmri/ -name "*_T1w.json" )
datalad get -J8 $( find openfmri/ -name "*_T2w.json" )
datalad get -J8 $( find openfmri/ -name "*_bold.json" )
datalad get -J8 $( find openfmri/ -name "*_magnitude*.json" )
datalad get -J8 $( find openfmri/ -name "*_phase*.json" )
datalad get -J8 $( find openfmri/ -name "*_fieldmap.json" )
# list subjects
cd openfmri
"""
import os
import glob
import numpy as np
def get_parser():
"""Build parser object"""
from argparse import ArgumentParser
from argparse import RawTextHelpFormatter
parser = ArgumentParser(
description='OpenfMRI participants sampler, for FMRIPREP\'s testing purposes',
formatter_class=RawTextHelpFormatter)
parser.add_argument('openfmri_dir', action='store',
help='the root folder of a the openfmri dataset')
# optional arguments
parser.add_argument('-D', '--datalad_fetch', action='store_true', default=False,
help='download sampled subjects')
parser.add_argument('-o', '--output-file', action='store', help='write output file')
parser.add_argument('-n', '--num-participants', action='store', type=int, default=4,
help='number of participants randomly selected per dataset')
parser.add_argument('--njobs', action='store', type=int, help='parallel downloads')
parser.add_argument('--seed', action='store', type=int, default=20170914,
help='seed for random number generation')
return parser
def main():
"""Entry point"""
thispath = os.getcwd()
opts = get_parser().parse_args()
np.random.seed(opts.seed)
out_file = None
if opts.output_file is not None:
out_file = os.path.abspath(opts.output_file)
os.chdir(opts.openfmri_dir)
all_sub = sorted(glob.glob('ds*/sub-*'))
datasets = {}
multises = set()
for subj in all_sub:
ds = subj.split('/')[0]
if os.path.isdir(os.path.join(subj, 'anat')) and os.path.isdir(os.path.join(subj, 'func')):
datasets.setdefault(ds, []).append(os.path.basename(subj))
elif (glob.glob(os.path.join(subj, 'ses-*', 'anat')) and
glob.glob(os.path.join(subj, 'ses-*', 'func'))):
multises.add(ds)
datasets.setdefault(ds, []).append(os.path.basename(subj))
subsample = {}
n_sample = 0
for ds, sublist in datasets.items():
n_sample += min(opts.num_participants, len(sublist))
if len(sublist) <= opts.num_participants:
subsample[ds] = sublist
else:
subsample[ds] = sorted(np.random.choice(
sublist, size=opts.num_participants, replace=False).tolist())
# Double check everything looks good
assert n_sample == len([sub for _, sublist in datasets.items() for sub in sublist])
if out_file is not None:
import yaml
with open(out_file, 'w') as outfh:
outfh.write(yaml.dump(subsample))
print('Sampled participants stored to %s' % out_file)
singleses = set(datasets.keys()) - multises
print('Sampled %d participants' % n_sample)
print('Datasets summary:\n\tSingle-session=%d'
'\n\tMulti-session=%d'
'\n\tTotal participants=%d' % (len(singleses), len(multises), n_sample))
os.chdir(thispath)
if opts.datalad_fetch:
import datalad.api as dlad
for ds, sublist in subsample.items():
for sub in sublist:
dlad.get(path=os.path.join(opts.openfmri_dir, ds, sub),
recursive=True, jobs=opts.njobs, verbose=True)
if __name__ == '__main__':
main()
# -*- coding: utf-8 -*-
# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""
A tool to generate a tasks_list.sh file for running fmriprep
on subjects downloaded with datalad with sample_openfmri.py
"""
import os
import glob
CMDLINE = """\
{fmriprep_cmd} {bids_dir}/{dataset_dir} {output_dir}/{dataset_dir} participant \
-w {dataset_dir}/work --participant_label {participant_label} \
"""
def get_parser():
"""Build parser object"""
from argparse import ArgumentParser
from argparse import RawTextHelpFormatter
parser = ArgumentParser(
description='OpenfMRI participants sampler, for FMRIPREP\'s testing purposes',
formatter_class=RawTextHelpFormatter)
parser.add_argument('openfmri_dir', action='store',
help='the root folder of a the openfmri dataset')
parser.add_argument('output_dir', action='store',
help='the directory where outputs should be stored')
parser.add_argument('sample_file', action='store',
help='a YAML file containing the subsample schedule')
# optional arguments
parser.add_argument('--anat-only', action='store_true', default=False,
help='run only anatomical workflow')
parser.add_argument('--nthreads', action='store', type=int,
help='number of total threads')
parser.add_argument('--omp_nthreads', action='store', type=int,
help='number of threads for OMP-based interfaces')
parser.add_argument('--mem-gb', action='store', type=int,
help='available memory in GB')
parser.add_argument('--tasks-list-file', default='tasks_list.sh',
action='store', help='write output file')
parser.add_argument('-t', '--tasks-filter', action='store', nargs='*',
help='run only specific tasks')
parser.add_argument('--cmd-call', action='store', help='command to be run')
return parser
def main():
"""Entry point"""
import yaml
opts = get_parser().parse_args()
with open(opts.sample_file) as sfh:
sampledict = yaml.load(sfh)
cmdline = CMDLINE
if opts.anat_only:
cmdline += ' --anat-only'
if opts.nthreads:
cmdline += '--nthreads %d' % opts.nthreads
if opts.omp_nthreads:
cmdline += '--omp-nthreads %d' % opts.omp_nthreads
if opts.mem_gb:
cmdline += '--mem_mb %d' % (opts.mem_gb * 1000)
if opts.tasks_filter:
cmdline += '-t %s' % ' '.join(opts.tasks_filter)
fmriprep_cmd = 'fmriprep'
if opts.cmd_call is None:
singularity_dir = os.getenv('SINGULARITY_BIN')
singularity_img = sorted(
glob.glob(os.path.join(singularity_dir, 'poldracklab_fmriprep_*')))
if singularity_img:
fmriprep_cmd = 'singularity run %s' % singularity_img[-1]
task_cmds = []
# Try to make this Python 2 compatible
try:
os.makedirs(opts.output_dir)
except OSError as e:
if e.errno != os.errno.EEXIST:
raise
for dset, sublist in sampledict.items():
for sub in sublist:
cmd = cmdline.format(
fmriprep_cmd=fmriprep_cmd,
bids_dir=opts.openfmri_dir,
dataset_dir=dset,
output_dir=opts.output_dir,
participant_label=sub,
)
task_cmds.append(cmd)
with open(opts.tasks_list_file, 'w') as tlfile:
tlfile.write('\n'.join(task_cmds))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment