Last active
January 17, 2020 07:55
-
-
Save lindenb/6f6513a69d508252b899cf43f119f743 to your computer and use it in GitHub Desktop.
nextflow workflow for somalier #nextflow #workflow #somalier #relatedness #samples #bam
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
params.prefix="output." | |
params.bamlist="" | |
params.sites="https://github.com/brentp/somalier/files/3412453/sites.hg19.vcf.gz" | |
params.help=false | |
params.pedigree="NO_FILE" | |
def helpMessage() { | |
log.info""" | |
========================================= | |
Usage: | |
fast sample-swap and relatedness checks on BAMs/CRAMs/VCFs/GVCFs | |
See : https://github.com/brentp/somalier | |
Mandatory arguments: | |
--prefix output prefix | |
--publishDir Output directory. | |
--reference Reference fasta file | |
--bamlist path to BAMs | |
Other options: | |
--sites URL of dowload sites (${params.sites}) | |
--pedigree original pedigree | |
Nextflow options: | |
-w Work directory used by Nextflow. | |
workflow Author: Pierre Lindenbaum @yokofakun 20200116 | |
========================================= | |
""" | |
} | |
pedIn = file(params.pedigree) | |
reference = file(params.reference) | |
bamIn = file(params.bamlist) | |
if( params.help ) { | |
helpMessage() | |
exit 0 | |
} | |
String jvarkit(String tool) { | |
return "\${JVARKIT_DIST}/" + tool + ".jar" | |
} | |
process downloadSomalier { | |
output: | |
file("somalier") into somalier_exe | |
script: | |
""" | |
wget -O somalier "https://github.com/brentp/somalier/releases/download/v0.2.7/somalier" | |
chmod +x somalier | |
""" | |
} | |
process downloadSites { | |
output: | |
file("sites.vcf.gz") into sites_vcf | |
script: | |
""" | |
wget -q -O sites.vcf.gz "${params.sites}" | |
bcftools index -t sites.vcf.gz | |
""" | |
} | |
process makeBamList { | |
tag "collect BAMs" | |
cache 'lenient' | |
input: | |
file ref from reference | |
file bams from bamIn | |
output: | |
file("sample_bam.csv") into bam_list1 | |
script: | |
""" | |
cat "${bams}" | while read B | |
do | |
samtools view --reference "${ref.toRealPath()}" -H "\${B}" | grep "^@RG" | tr "\t" "\\n" | grep ^SM: -m1 | cut -d ':' -f 2 | tr "\\n" "," >> jeter.csv | |
echo "\${B}" >> jeter.csv | |
done | |
sort -T . -t ',' -k1,1 jeter.csv > sample_bam.csv | |
cut -d, -f 1 sample_bam.csv | sort | uniq -d > dups.txt | |
test ! -s dups.txt | |
rm dups.txt | |
rm jeter.csv | |
test -s sample_bam.csv | |
""" | |
} | |
process extract { | |
tag "${sample}" | |
cache 'lenient' | |
maxForks 30 | |
memory '2g' | |
input: | |
file ref from reference | |
file somalier from somalier_exe | |
file sites from sites_vcf | |
set sample,bam from bam_list1.splitCsv(header: false,sep:',',strip:true) | |
output: | |
file("extracted/${sample}.somalier") into extracted | |
script: | |
""" | |
mkdir extracted | |
${somalier.toRealPath()} extract -d extracted --sites "${sites.toRealPath()}" -f "${ref.toRealPath()}" "${bam}" | |
test -s "extracted/${sample}.somalier" | |
""" | |
} | |
process relate { | |
tag "${L.size()} samples" | |
cache 'lenient' | |
maxForks 1 | |
memory '2g' | |
publishDir "${params.publishDir}" , mode: 'copy', overwrite: true | |
when: | |
L.size()>1 | |
input: | |
file ped from pedIn | |
file somalier from somalier_exe | |
val L from extracted.collect() | |
output: | |
file("${params.prefix}groups.tsv") | |
file("${params.prefix}html") | |
file("${params.prefix}pairs.tsv") | |
file("${params.prefix}samples.tsv") | |
script: | |
def pedarg = ped.name != 'NO_FILE' ? "-p ${ped}" : '' | |
""" | |
${somalier.toRealPath()} relate --output-prefix=${params.prefix} ${pedarg} ${L.join(" ")} | |
""" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment