Last active
January 14, 2019 22:55
-
-
Save jayrbolton/38febac7c21cf29ff8e32a725f800ac7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Define a docker workflow (independent of KBase) | |
# - output and input files | |
# - minimum node requirements for a job | |
# - whether to exit on any failure or continue on failure | |
# - pass through environment variables | |
# - htcondor, etc backend | |
# - automatically figure out serial and concurrent execution based on task input and output | |
def subsample(): | |
return Task( | |
image='jgi/subsample', | |
input_files=['reads.fastq'], | |
node_requirements={'min_memory': '16GB'}, | |
command=[ | |
'reformat.sh', | |
'in=reads.fastq', | |
'out=subsample.fastq.gz', | |
'bhist=bhist.txt', | |
'gchist=gchist.txt', | |
'bqhist=bqhist.txt', | |
'obqhist=obqhist.txt', | |
'samplerate=0.1', | |
'qin=33', | |
'ow=t', | |
'gcplot=t', | |
'gcbins=auto' | |
] | |
) | |
def mer_sampling(env): | |
return Task( | |
image='jgi/mer_sampling', | |
input_files=['reads.fastq'], | |
node_requirements={ | |
'min_cores': 2, | |
'min_memory': '8GB' | |
}, | |
command=[ | |
'bbcountunique.sh', | |
'in=reads.fastq', | |
'out=merSampler.m25.e25000_2', | |
'k=25', | |
'percent=t', | |
'count=t', | |
'cumulative=f', | |
'ow=t' | |
] | |
) | |
def dedupe(): | |
return Task( | |
image='jgi/subsample', | |
input_files=['subsample'], | |
command=[ | |
'reformat.sh', | |
'in=subsample', | |
'out=subsample.fastqc.gz', | |
'bhist=bhist.txt', | |
'gchist=gchist.txt', | |
'bqhist=bqhist.txt', | |
'obqhist=obqhist.txt', | |
'samplerate=0.1', | |
'qin=33', | |
'ow=t', | |
'gcplot=t', | |
'gcbins=auto' | |
] | |
) | |
def sketch(db): | |
return Task( | |
image='jgi/sketch', | |
input_files=['infile'], | |
command=[ | |
'sendsketch.sh', | |
'in=infile', | |
'out=sketch_vs_' + db + '.txt', | |
'ow=t', | |
'colors=f', | |
'printtaxa=t', | |
'depth', | |
'depth2', | |
'unique2', | |
'merge', | |
db | |
] | |
) | |
def jgi_read_qc(fastq_path, env): | |
"""Full Read QC task.""" | |
sketch_dbs = ["nt", "refseq", "silva"] | |
subsample_task = subsample() | |
.inputs({'reads.fastq': fastq_path}) | |
.set_env(env) | |
subsample_outfile = subsample_task.outfile('subsample.fastqc.gz') | |
mer_sampling_task = mer_sampling() | |
.inputs({'reads.fastq': fastq_path}) | |
.set_env(env) | |
dedupe_task = dedupe() | |
.inputs(subsample=subsample_outfile) | |
.set_env(env) | |
sketch_tasks = {} # type: dict | |
for db in sketch_dbs: | |
sketch_tasks['sketch_task_' + db] = sketch(db).inputs({'infile': fastq_path}).set_env(env) | |
return Tasks({ | |
'subsample': subsample_task, | |
'dedupe': dedupe_task, | |
'mer_sampling': mer_sampling_task, | |
'sketch_tasks': sketch_tasks | |
}) | |
# Interfacing with KBase | |
# - define kbase fields and forms using python | |
# - take values from the form and pass it to the task | |
# - take output from the task and make kbase objects and a final report | |
def narrative_form(params): | |
""" | |
Define a form that can go in a KBase narrative cell | |
""" | |
field = KBaseForm.text_field({ | |
'type': 'KBaseAssembly.PairedEndLibrary', | |
'label': 'Reads Input', | |
'description': 'This is a description' | |
}) | |
return KBaseForm([field]) | |
def run_task(form_data, task, env): | |
""" | |
Take the data from a narrative cell and create the jgi_read_qc task from it. | |
""" | |
# Download the reads object to a fastq path, then pass the file path as a param to the task | |
fastq_path = form_data['reads']['fastq_path'] | |
return jgi_read_qc(fastq_path, env) | |
def task_output_to_kbase(results, form_data): | |
"""Take the results from the readqc task (plus the original form data) and create a report.""" | |
report = KBaseReport( | |
html='path/to/html/directory', | |
files={ | |
'bhist.txt': { | |
'title': 'Base Frequency Histogram', | |
'data': results['subsample']['bhist.txt'] | |
} | |
# etc.. | |
}, | |
objects={ | |
# .. kbase object references | |
} | |
) | |
return report | |
# convert some output |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment