Skip to content

Instantly share code, notes, and snippets.

@lindenb
Last active January 17, 2020 07:55
Show Gist options
  • Save lindenb/6f6513a69d508252b899cf43f119f743 to your computer and use it in GitHub Desktop.
Save lindenb/6f6513a69d508252b899cf43f119f743 to your computer and use it in GitHub Desktop.
nextflow workflow for somalier #nextflow #workflow #somalier #relatedness #samples #bam
params.prefix="output."
params.bamlist=""
params.sites="https://github.com/brentp/somalier/files/3412453/sites.hg19.vcf.gz"
params.help=false
params.pedigree="NO_FILE"
def helpMessage() {
log.info"""
=========================================
Usage:
fast sample-swap and relatedness checks on BAMs/CRAMs/VCFs/GVCFs
See : https://github.com/brentp/somalier
Mandatory arguments:
--prefix output prefix
--publishDir Output directory.
--reference Reference fasta file
--bamlist path to BAMs
Other options:
--sites URL of dowload sites (${params.sites})
--pedigree original pedigree
Nextflow options:
-w Work directory used by Nextflow.
workflow Author: Pierre Lindenbaum @yokofakun 20200116
=========================================
"""
}
pedIn = file(params.pedigree)
reference = file(params.reference)
bamIn = file(params.bamlist)
if( params.help ) {
helpMessage()
exit 0
}
String jvarkit(String tool) {
return "\${JVARKIT_DIST}/" + tool + ".jar"
}
process downloadSomalier {
output:
file("somalier") into somalier_exe
script:
"""
wget -O somalier "https://github.com/brentp/somalier/releases/download/v0.2.7/somalier"
chmod +x somalier
"""
}
process downloadSites {
output:
file("sites.vcf.gz") into sites_vcf
script:
"""
wget -q -O sites.vcf.gz "${params.sites}"
bcftools index -t sites.vcf.gz
"""
}
process makeBamList {
tag "collect BAMs"
cache 'lenient'
input:
file ref from reference
file bams from bamIn
output:
file("sample_bam.csv") into bam_list1
script:
"""
cat "${bams}" | while read B
do
samtools view --reference "${ref.toRealPath()}" -H "\${B}" | grep "^@RG" | tr "\t" "\\n" | grep ^SM: -m1 | cut -d ':' -f 2 | tr "\\n" "," >> jeter.csv
echo "\${B}" >> jeter.csv
done
sort -T . -t ',' -k1,1 jeter.csv > sample_bam.csv
cut -d, -f 1 sample_bam.csv | sort | uniq -d > dups.txt
test ! -s dups.txt
rm dups.txt
rm jeter.csv
test -s sample_bam.csv
"""
}
process extract {
tag "${sample}"
cache 'lenient'
maxForks 30
memory '2g'
input:
file ref from reference
file somalier from somalier_exe
file sites from sites_vcf
set sample,bam from bam_list1.splitCsv(header: false,sep:',',strip:true)
output:
file("extracted/${sample}.somalier") into extracted
script:
"""
mkdir extracted
${somalier.toRealPath()} extract -d extracted --sites "${sites.toRealPath()}" -f "${ref.toRealPath()}" "${bam}"
test -s "extracted/${sample}.somalier"
"""
}
process relate {
tag "${L.size()} samples"
cache 'lenient'
maxForks 1
memory '2g'
publishDir "${params.publishDir}" , mode: 'copy', overwrite: true
when:
L.size()>1
input:
file ped from pedIn
file somalier from somalier_exe
val L from extracted.collect()
output:
file("${params.prefix}groups.tsv")
file("${params.prefix}html")
file("${params.prefix}pairs.tsv")
file("${params.prefix}samples.tsv")
script:
def pedarg = ped.name != 'NO_FILE' ? "-p ${ped}" : ''
"""
${somalier.toRealPath()} relate --output-prefix=${params.prefix} ${pedarg} ${L.join(" ")}
"""
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment