Created
March 5, 2019 17:02
-
-
Save danielecook/e9d60d606e6739da7eef176efe954272 to your computer and use it in GitHub Desktop.
nextflow drops cache
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
align = design.combine([8,12,16]) // Cores | |
.combine([12,32,64]) // Memory (GB) | |
.combine([1,2,3,7,8,9]) // Method | |
process alignment { | |
/* | |
Alignment as performed identically to the old method | |
*/ | |
cache 'deep' | |
tag { gen_tag("alignment", row, meta) } | |
module 'BWA/0.7.15-intel-2016b:picard/1.107-Java-1.8.0_92:SAMtools/1.3.1-foss-2016b:Sambamba/0.6.6' | |
memory "${memory} GB" | |
cpus "${cores}" | |
publishDir "output/bam/benchmark" | |
errorStrategy 'finish' | |
when: | |
// Only run when there is at least 0.187 Gb/core | |
cores / memory > 0.187 | |
input: | |
set val(row), val(meta), file(fq1), file(fq2), val(cores), val(memory), val(method) from align | |
output: | |
set val(row), val(meta), file("${slug}.bam"), file("${slug}.bam.bai") into align_out | |
script: | |
// Construct read group | |
RG = ["@RG", | |
"ID:${row.sample_name}.${row.seq_sample_id}_${row.run_dir}_${row.lane}", | |
"SM:${row.sample_name}", | |
"LB:${row.seq_sample_id}", | |
"PU:${row.flowcell}.${row.lane}", | |
"PL:illumina", | |
"SM:${row.sample_name}"].join("\\t") | |
meta = new HashMap(meta) | |
meta['cores'] = cores | |
meta['memory'] = memory | |
meta['method'] = method | |
PICARD_PARAMS = "MAX_RECORDS_IN_RAM=2000000 VALIDATION_STRINGENCY=STRICT" | |
slug = p(row, meta) | |
if (method == 1) | |
""" | |
# 'Old' method | |
# Uses the old samtools | |
module load SAMtools/0.1.19-foss-2016b | |
bwa mem -t ${task.cpus} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > 35 || \$1 ~ /^@/' - | \\ | |
samtools view -@ ${task.cpus} -bhS - > out.unsorted.bam | |
java -d64 -Xmx8g -jar \${ROOTPICARD}/SortSam.jar INPUT=out.unsorted.bam OUTPUT=${slug}.bam SORT_ORDER=coordinate ${PICARD_PARAMS} | |
java -d64 -Xmx8g -jar \${ROOTPICARD}/BuildBamIndex.jar INPUT=${slug}.bam OUTPUT=${slug}.bam.bai ${PICARD_PARAMS} | |
""" | |
else if (method == 2) | |
""" | |
# Samtools+sambamba with intermediate file | |
bwa mem -t ${task.cpus} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\ | |
samtools view -bh --threads ${task.cpus} - > out.tmp.bam | |
# Sort alignment | |
sambamba sort -m "${task.memory.toGiga()}GB" \\ | |
--nthreads=${task.cpus*params.threads_per_core} \\ | |
--tmpdir=. \\ | |
--out=${slug}.bam \\ | |
out.tmp.bam | |
sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam | |
rm out.tmp.bam | |
""" | |
else if (method == 3) | |
""" | |
# Samtools+sambamba with intermediate file and balanced threads | |
bwa mem -t ${task.cpus/2} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\ | |
samtools view -bh --threads ${task.cpus/2} - > out.tmp.bam | |
# Sort alignment | |
sambamba sort -m "${task.memory.toGiga()}GB" \\ | |
--nthreads=${task.cpus} \\ | |
--tmpdir=. \\ | |
--out=${slug}.bam \\ | |
out.tmp.bam | |
sambamba index --nthreads=${task.cpus} ${slug}.bam | |
rm out.tmp.bam | |
""" | |
else if (method == 4) | |
""" | |
# Samtools+sambamba with pipe unbalanced | |
bwa mem -t ${task.cpus} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\ | |
samtools view -bh --threads ${task.cpus} - | \\ | |
sambamba sort -m "${task.memory.toGiga()-2}GB" \\ | |
--nthreads=${task.cpus} \\ | |
--tmpdir=. \\ | |
--out=${slug}.bam \\ | |
/dev/stdin | |
sambamba index --nthreads=${task.cpus} ${slug}.bam | |
""" | |
else if (method == 5) | |
""" | |
# Samtools+sambamba with pipe and balanced threads | |
bwa mem -t ${task.cpus/2} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\ | |
samtools view -bh --threads ${task.cpus/4} - | \\ | |
sambamba sort -m "${task.memory.toGiga()-2}GB" \\ | |
--nthreads=${task.cpus/4} \\ | |
--tmpdir=. \\ | |
--out=${slug}.bam \\ | |
/dev/stdin | |
sambamba index --nthreads=${task.cpus} ${slug}.bam | |
""" | |
else if (method == 6) | |
""" | |
# Samtools+sambamba with pipe, balanced threads and balanced memory | |
bwa mem -t ${task.cpus/2} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\ | |
samtools view -bh --threads ${task.cpus/4} - | \\ | |
sambamba sort -m "${(task.memory.toGiga()/2)-2}GB" \\ | |
--nthreads=${task.cpus/4} \\ | |
--tmpdir=. \\ | |
--out=${slug}.bam \\ | |
/dev/stdin | |
sambamba index --nthreads=${task.cpus} ${slug}.bam | |
""" | |
else if (method == 7) | |
""" | |
# Samtools+sambamba with intermediate file; No threads on samtools out | |
bwa mem -t ${task.cpus} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' | \\ | |
samtools view -bh - > out.tmp.bam | |
# Sort alignment | |
sambamba sort -m "${task.memory.toGiga()}GB" \\ | |
--nthreads=${task.cpus*params.threads_per_core} \\ | |
--tmpdir=. \\ | |
--out=${slug}.bam \\ | |
out.tmp.bam | |
sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam | |
rm out.tmp.bam | |
""" | |
else if (method == 8) | |
""" | |
# BWA Intermediate; Samtools intermediate;Samtools+sambamba with intermediate file; No threads on samtools out | |
bwa mem -t ${task.cpus} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' > out.SAM | |
samtools view -bh --threads ${task.cpus} out.SAM > out.tmp.bam | |
rm out.SAM | |
# Sort alignment | |
sambamba sort -m "${task.memory.toGiga()}GB" \\ | |
--nthreads=${task.cpus*params.threads_per_core} \\ | |
--tmpdir=. \\ | |
--out=${slug}.bam \\ | |
out.tmp.bam | |
sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam | |
rm out.tmp.bam | |
""" | |
else if (method == 9) | |
""" | |
# BWA Intermediate; Samtools pipe to sambamba | |
bwa mem -t ${task.cpus} \\ | |
-R "${RG}" \\ | |
${params.genome} ${fq1} ${fq2} | \\ | |
awk 'length(\$10) > ${params.min_read_length} || \$1 ~ /^@/' > out.SAM | |
samtools view -bh --threads ${task.cpus} out.SAM > out.tmp.bam | |
# Sort alignment | |
sambamba sort -m "${task.memory.toGiga()}GB" \\ | |
--nthreads=${task.cpus*params.threads_per_core} \\ | |
--tmpdir=. \\ | |
--out=${slug}.bam \\ | |
out.tmp.bam | |
sambamba index --nthreads=${task.cpus*params.threads_per_core} ${slug}.bam | |
rm out.tmp.bam out.SAM | |
""" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment