Created
November 20, 2024 21:46
-
-
Save robsyme/a830d29046082e16b0fb26b6055e2cb2 to your computer and use it in GitHub Desktop.
Example Genotyping script using new workflow output
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env nextflow | |
| nextflow.preview.output = true | |
| /* | |
| * Pipeline parameters | |
| */ | |
| // Primary input (file of input files, one per line) | |
| params.reads_bam = "${projectDir}/data/sample_bams.txt" | |
| // Output directory | |
| params.outdir = "results_genomics" | |
| // Accessory files | |
| params.reference = "${projectDir}/data/ref/ref.fasta" | |
| params.reference_index = "${projectDir}/data/ref/ref.fasta.fai" | |
| params.reference_dict = "${projectDir}/data/ref/ref.dict" | |
| params.intervals = "${projectDir}/data/ref/intervals.bed" | |
| params.cohort_name = "family_trio" | |
| /* | |
| * Generate BAM index file | |
| */ | |
| process SAMTOOLS_INDEX { | |
| container 'community.wave.seqera.io/library/samtools:1.20--b5dfbd93de237464' | |
| // publishDir params.outdir, mode: 'symlink' | |
| input: | |
| path input_bam | |
| output: | |
| tuple path(input_bam), path("${input_bam}.bai") | |
| script: | |
| """ | |
| samtools index '$input_bam' | |
| """ | |
| } | |
| /* | |
| * Call variants with GATK HaplotypeCaller | |
| */ | |
| process GATK_HAPLOTYPECALLER { | |
| container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" | |
| publishDir params.outdir, mode: 'symlink' | |
| input: | |
| tuple path(input_bam), path(input_bam_index) | |
| path ref_fasta | |
| path ref_index | |
| path ref_dict | |
| path interval_list | |
| output: | |
| path "*.vcf" , emit: vcf | |
| path "*.vcf.idx" , emit: idx | |
| script: | |
| """ | |
| gatk HaplotypeCaller \ | |
| -R ${ref_fasta} \ | |
| -I ${input_bam} \ | |
| -O ${input_bam}.g.vcf \ | |
| -L ${interval_list} \ | |
| -ERC GVCF | |
| """ | |
| } | |
| /* | |
| * Combine GVCFs into GenomicsDB datastore | |
| */ | |
| process GATK_JOINTGENOTYPING { | |
| container "community.wave.seqera.io/library/gatk4:4.5.0.0--730ee8817e436867" | |
| publishDir params.outdir, mode: 'copy' | |
| input: | |
| path all_gvcfs | |
| path all_idxs | |
| path interval_list | |
| val cohort_name | |
| path ref_fasta | |
| path ref_index | |
| path ref_dict | |
| output: | |
| path "${cohort_name}.joint.vcf", emit: vcf | |
| path "${cohort_name}.joint.vcf.idx", emit: idx | |
| script: | |
| def gvcfs_line = all_gvcfs.collect { gvcf -> "-V $gvcf" }.join(' ') | |
| """ | |
| gatk GenomicsDBImport \ | |
| ${gvcfs_line} \ | |
| -L ${interval_list} \ | |
| --genomicsdb-workspace-path ${cohort_name}_gdb | |
| gatk GenotypeGVCFs \ | |
| -R ${ref_fasta} \ | |
| -V gendb://${cohort_name}_gdb \ | |
| -L ${interval_list} \ | |
| -O ${cohort_name}.joint.vcf | |
| """ | |
| } | |
| workflow { | |
| main: | |
| // Create input channel from a text file listing input file paths | |
| reads_ch = Channel.fromPath(params.reads_bam).splitText() | |
| // Load the file paths for the accessory files (reference and intervals) | |
| ref_file = file(params.reference) | |
| ref_index_file = file(params.reference_index) | |
| ref_dict_file = file(params.reference_dict) | |
| intervals_file = file(params.intervals) | |
| // Create index file for input BAM file | |
| SAMTOOLS_INDEX(reads_ch) | |
| // Call variants from the indexed BAM file | |
| GATK_HAPLOTYPECALLER( | |
| SAMTOOLS_INDEX.out, | |
| ref_file, | |
| ref_index_file, | |
| ref_dict_file, | |
| intervals_file | |
| ) | |
| // Collect variant calling outputs across samples | |
| all_gvcfs_ch = GATK_HAPLOTYPECALLER.out.vcf | |
| .dump(tag:"beforeCollect") | |
| .collect() | |
| .dump(tag:"afterCollect") | |
| all_idxs_ch = GATK_HAPLOTYPECALLER.out.idx.collect() | |
| // Combine GVCFs into a GenomicsDB datastore | |
| GATK_JOINTGENOTYPING( | |
| all_gvcfs_ch, | |
| all_idxs_ch, | |
| intervals_file, | |
| params.cohort_name, | |
| ref_file, | |
| ref_index_file, | |
| ref_dict_file | |
| ) | |
| GATK_JOINTGENOTYPING.out.vcf.dump(tag: "foo") | |
| publish: | |
| GATK_JOINTGENOTYPING.out.vcf >> 'jointGenotyped' | |
| } | |
| output { | |
| 'jointGenotyped' { | |
| path 'vcfs' | |
| mode 'copy' | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment