oesteban · March 23, 2020 02:47
diff --git a/sample_openfmri.py b/sample_openfmri.py
 # -*- coding: utf-8 -*-
 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
 # vi: set ft=python sts=4 ts=4 sw=4 et:
 """
 A tool to sample OpenfMRI datasets using a datalad installation

 Please run this first: ::

    # install openfmri dataset
    datalad install -r ///openfmri

    # get all sidecar files
    datalad get -J8 $( find openfmri/ -name "*_T1w.json" )
    datalad get -J8 $( find openfmri/ -name "*_T2w.json" )
    datalad get -J8 $( find openfmri/ -name "*_bold.json" )
    datalad get -J8 $( find openfmri/ -name "*_magnitude*.json" )
    datalad get -J8 $( find openfmri/ -name "*_phase*.json" )
    datalad get -J8 $( find openfmri/ -name "*_fieldmap.json" )

    # list subjects
    cd openfmri


 """

 import os
 import glob
 import numpy as np


 def get_parser():
    """Build parser object"""
    from argparse import ArgumentParser
    from argparse import RawTextHelpFormatter

    parser = ArgumentParser(
        description='OpenfMRI participants sampler, for FMRIPREP\'s testing purposes',
        formatter_class=RawTextHelpFormatter)

    parser.add_argument('openfmri_dir', action='store',
                        help='the root folder of a the openfmri dataset')

    # optional arguments
    parser.add_argument('-D', '--datalad_fetch', action='store_true', default=False,
                        help='download sampled subjects')
    parser.add_argument('-o', '--output-file', action='store', help='write output file')
    parser.add_argument('-n', '--num-participants', action='store', type=int, default=4,
                        help='number of participants randomly selected per dataset')
    parser.add_argument('--njobs', action='store', type=int, help='parallel downloads')
    parser.add_argument('--seed', action='store', type=int, default=20170914,
                        help='seed for random number generation')

    return parser


 def main():
    """Entry point"""
    thispath = os.getcwd()
    opts = get_parser().parse_args()
    np.random.seed(opts.seed)

    out_file = None
    if opts.output_file is not None:
        out_file = os.path.abspath(opts.output_file)

    os.chdir(opts.openfmri_dir)
    all_sub = sorted(glob.glob('ds*/sub-*'))
    datasets = {}
    multises = set()
    for subj in all_sub:
        ds = subj.split('/')[0]
        if os.path.isdir(os.path.join(subj, 'anat')) and os.path.isdir(os.path.join(subj, 'func')):
            datasets.setdefault(ds, []).append(os.path.basename(subj))
        elif (glob.glob(os.path.join(subj, 'ses-*', 'anat')) and
              glob.glob(os.path.join(subj, 'ses-*', 'func'))):
            multises.add(ds)
            datasets.setdefault(ds, []).append(os.path.basename(subj))

    subsample = {}

    n_sample = 0
    for ds, sublist in datasets.items():
        n_sample += min(opts.num_participants, len(sublist))
        if len(sublist) <= opts.num_participants:
            subsample[ds] = sublist
        else:
            subsample[ds] = sorted(np.random.choice(
                sublist, size=opts.num_participants, replace=False).tolist())

    # Double check everything looks good
    assert n_sample == len([sub for _, sublist in datasets.items() for sub in sublist])

    if out_file is not None:
        import yaml
        with open(out_file, 'w') as outfh:
            outfh.write(yaml.dump(subsample))
        print('Sampled participants stored to %s' % out_file)

    singleses = set(datasets.keys()) - multises
    print('Sampled %d participants' % n_sample)
    print('Datasets summary:\n\tSingle-session=%d'
          '\n\tMulti-session=%d'
          '\n\tTotal participants=%d' % (len(singleses), len(multises), n_sample))
    os.chdir(thispath)

    if opts.datalad_fetch:
        import datalad.api as dlad
        for ds, sublist in subsample.items():
            for sub in sublist:
                dlad.get(path=os.path.join(opts.openfmri_dir, ds, sub),
                         recursive=True, jobs=opts.njobs, verbose=True)


 if __name__ == '__main__':
    main()
diff --git a/sample_openfmri_tasks_list.py b/sample_openfmri_tasks_list.py
 # -*- coding: utf-8 -*-
 # emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
 # vi: set ft=python sts=4 ts=4 sw=4 et:
 """
 A tool to generate a tasks_list.sh file for running fmriprep
 on subjects downloaded with datalad with sample_openfmri.py


 """

 import os
 import glob

 CMDLINE = """\
 {fmriprep_cmd} {bids_dir}/{dataset_dir} {output_dir}/{dataset_dir} participant \
 -w {dataset_dir}/work --participant_label {participant_label} \
 """


 def get_parser():
    """Build parser object"""
    from argparse import ArgumentParser
    from argparse import RawTextHelpFormatter

    parser = ArgumentParser(
        description='OpenfMRI participants sampler, for FMRIPREP\'s testing purposes',
        formatter_class=RawTextHelpFormatter)

    parser.add_argument('openfmri_dir', action='store',
                        help='the root folder of a the openfmri dataset')

    parser.add_argument('output_dir', action='store',
                        help='the directory where outputs should be stored')

    parser.add_argument('sample_file', action='store',
                        help='a YAML file containing the subsample schedule')

    # optional arguments
    parser.add_argument('--anat-only', action='store_true', default=False,
                        help='run only anatomical workflow')
    parser.add_argument('--nthreads', action='store', type=int,
                        help='number of total threads')
    parser.add_argument('--omp_nthreads', action='store', type=int,
                        help='number of threads for OMP-based interfaces')
    parser.add_argument('--mem-gb', action='store', type=int,
                        help='available memory in GB')
    parser.add_argument('--tasks-list-file', default='tasks_list.sh',
                        action='store', help='write output file')
    parser.add_argument('-t', '--tasks-filter', action='store', nargs='*',
                        help='run only specific tasks')
    parser.add_argument('--cmd-call', action='store', help='command to be run')
    return parser


 def main():
    """Entry point"""
    import yaml
    opts = get_parser().parse_args()

    with open(opts.sample_file) as sfh:
        sampledict = yaml.load(sfh)

    cmdline = CMDLINE
    if opts.anat_only:
        cmdline += ' --anat-only'

    if opts.nthreads:
        cmdline += '--nthreads %d' % opts.nthreads

    if opts.omp_nthreads:
        cmdline += '--omp-nthreads %d' % opts.omp_nthreads

    if opts.mem_gb:
        cmdline += '--mem_mb %d' % (opts.mem_gb * 1000)

    if opts.tasks_filter:
        cmdline += '-t %s' % ' '.join(opts.tasks_filter)

    fmriprep_cmd = 'fmriprep'
    if opts.cmd_call is None:
        singularity_dir = os.getenv('SINGULARITY_BIN')
        singularity_img = sorted(
            glob.glob(os.path.join(singularity_dir, 'poldracklab_fmriprep_*')))
        if singularity_img:
            fmriprep_cmd = 'singularity run %s' % singularity_img[-1]

    task_cmds = []

    # Try to make this Python 2 compatible
    try:
        os.makedirs(opts.output_dir)
    except OSError as e:
        if e.errno != os.errno.EEXIST:
            raise

    for dset, sublist in sampledict.items():
        for sub in sublist:
            cmd = cmdline.format(
                fmriprep_cmd=fmriprep_cmd,
                bids_dir=opts.openfmri_dir,
                dataset_dir=dset,
                output_dir=opts.output_dir,
                participant_label=sub,
            )
            task_cmds.append(cmd)

    with open(opts.tasks_list_file, 'w') as tlfile:
        tlfile.write('\n'.join(task_cmds))


 if __name__ == '__main__':
    main()
	# -- coding: utf-8 --
	# emacs: -- mode: python; py-indent-offset: 4; indent-tabs-mode: nil --
	# vi: set ft=python sts=4 ts=4 sw=4 et:
	"""
	A tool to sample OpenfMRI datasets using a datalad installation

	Please run this first: ::

	# install openfmri dataset
	datalad install -r ///openfmri

	# get all sidecar files
	datalad get -J8 $( find openfmri/ -name "*_T1w.json" )
	datalad get -J8 $( find openfmri/ -name "*_T2w.json" )
	datalad get -J8 $( find openfmri/ -name "*_bold.json" )
	datalad get -J8 $( find openfmri/ -name "_magnitude.json" )
	datalad get -J8 $( find openfmri/ -name "_phase.json" )
	datalad get -J8 $( find openfmri/ -name "*_fieldmap.json" )

	# list subjects
	cd openfmri


	"""

	import os
	import glob
	import numpy as np


	def get_parser():
	"""Build parser object"""
	from argparse import ArgumentParser
	from argparse import RawTextHelpFormatter

	parser = ArgumentParser(
	description='OpenfMRI participants sampler, for FMRIPREP\'s testing purposes',
	formatter_class=RawTextHelpFormatter)

	parser.add_argument('openfmri_dir', action='store',
	help='the root folder of a the openfmri dataset')

	# optional arguments
	parser.add_argument('-D', '--datalad_fetch', action='store_true', default=False,
	help='download sampled subjects')
	parser.add_argument('-o', '--output-file', action='store', help='write output file')
	parser.add_argument('-n', '--num-participants', action='store', type=int, default=4,
	help='number of participants randomly selected per dataset')
	parser.add_argument('--njobs', action='store', type=int, help='parallel downloads')
	parser.add_argument('--seed', action='store', type=int, default=20170914,
	help='seed for random number generation')

	return parser


	def main():
	"""Entry point"""
	thispath = os.getcwd()
	opts = get_parser().parse_args()
	np.random.seed(opts.seed)

	out_file = None
	if opts.output_file is not None:
	out_file = os.path.abspath(opts.output_file)

	os.chdir(opts.openfmri_dir)
	all_sub = sorted(glob.glob('ds/sub-'))
	datasets = {}
	multises = set()
	for subj in all_sub:
	ds = subj.split('/')[0]
	if os.path.isdir(os.path.join(subj, 'anat')) and os.path.isdir(os.path.join(subj, 'func')):
	datasets.setdefault(ds, []).append(os.path.basename(subj))
	elif (glob.glob(os.path.join(subj, 'ses-*', 'anat')) and
	glob.glob(os.path.join(subj, 'ses-*', 'func'))):
	multises.add(ds)
	datasets.setdefault(ds, []).append(os.path.basename(subj))

	subsample = {}

	n_sample = 0
	for ds, sublist in datasets.items():
	n_sample += min(opts.num_participants, len(sublist))
	if len(sublist) <= opts.num_participants:
	subsample[ds] = sublist
	else:
	subsample[ds] = sorted(np.random.choice(
	sublist, size=opts.num_participants, replace=False).tolist())

	# Double check everything looks good
	assert n_sample == len([sub for _, sublist in datasets.items() for sub in sublist])

	if out_file is not None:
	import yaml
	with open(out_file, 'w') as outfh:
	outfh.write(yaml.dump(subsample))
	print('Sampled participants stored to %s' % out_file)

	singleses = set(datasets.keys()) - multises
	print('Sampled %d participants' % n_sample)
	print('Datasets summary:\n\tSingle-session=%d'
	'\n\tMulti-session=%d'
	'\n\tTotal participants=%d' % (len(singleses), len(multises), n_sample))
	os.chdir(thispath)

	if opts.datalad_fetch:
	import datalad.api as dlad
	for ds, sublist in subsample.items():
	for sub in sublist:
	dlad.get(path=os.path.join(opts.openfmri_dir, ds, sub),
	recursive=True, jobs=opts.njobs, verbose=True)


	if __name__ == '__main__':
	main()