Created
February 6, 2019 23:09
-
-
Save jayrbolton/a24608438b2f936c728045cb05f48366 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A task is a docker image, a command, and optional hardware prefs | |
# A pipeline is a collection of sequential or parallel tasks with a shared volume mount | |
# Pipelines are submitted to HTCondor | |
# A small application layer would handle the job submission to HTCondor, | |
# pulling docker images and running containers with correct mounts and settings | |
# Define an HTCondor job with a docker image, command, and node requirements or preferences | |
subsample = Task( | |
image='jgi/subsample', | |
node_prefs={'min_memory': '16GB'}, | |
command=[...] | |
) | |
mer_sampling = Task( | |
image='jgi/mer_sampling', | |
node_prefs={'min_cores': 2, 'min_memory': '8GB'}, | |
command=[...] | |
) | |
dedupe = Task( | |
image='jgi/dedupe', | |
command=[...] | |
) | |
# A parameterized task | |
def sketch(db): | |
return Task( | |
image='jgi/sketch', | |
command=['sendsketch.sh', 'in=infile', f'out=sketch_vs_{db}.txt'] | |
) | |
# Define a full pipeline of a sequence of tasks, with some that can be parallel | |
# All tasks in a pipeline run with a shared directory mount and a shared volume mount in docker | |
sketch_dbs = ['nt', 'refseq', 'silva'] | |
sketch_tasks = [sketch(db) for db in sketch_dbs] | |
readqc = pipeline.Serial([ | |
subsample, | |
dedupe, | |
mer_sampling, | |
pipeline.Parallel(sketch_tasks) | |
]) | |
# Submit the pipeline to HTCondor | |
results = pipeline.submit(readqc, url, token) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment