Created
October 23, 2025 21:34
-
-
Save clintval/5c4b1d0295f4da1195ac89ee80c59b04 to your computer and use it in GitHub Desktop.
A shitty exome sequencing pipeline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| rule all: | |
| input: | |
| ####################################################################### | |
| # | |
| # Pipeline #1 Flow cell QC and Demultiplexing | |
| # | |
| ####################################################################### | |
| # check_illumina_directory | |
| f'{run_output}/logs/CheckIlluminaDirectory.log', | |
| # collect_illumina_lane_metrics | |
| f'{run_output}/basecalling/{run_barcode}.illumina_lane_metrics.txt', | |
| f'{run_output}/basecalling/{run_barcode}.illumina_phasing_metrics.txt', | |
| # extract_illumina_barcodes | |
| expand(f'{run_output}/basecalling/barcodes.{{lane}}/barcode_counts.lane-{{lane}}.metrics.txt', lane=lanes), | |
| # collect_illumina_basecalling_metrics | |
| expand(f'{run_output}/basecalling/basecalling.lane-{{lane}}.metrics.txt', lane=lanes), | |
| # collect_quality_yield_metrics | |
| expand(f'{run_output}/{{name}}/{{name}}.unmapped.quality_yield_metrics.txt', name=sample_names), | |
| ####################################################################### | |
| # | |
| # Pipeline #2 Alignment | |
| # | |
| ####################################################################### | |
| # align_with_bwa_mem, sort_sam_files, mark_duplicates | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.bam', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.duplicate_metrics.txt', name=sample_names), | |
| ####################################################################### | |
| # | |
| # Pipeline #3 QC Mapped BAM | |
| # | |
| ####################################################################### | |
| # collect_alignment_summary_metrics | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.alignment_summary_metrics.txt', name=sample_names), | |
| # collect_hs_metrics | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.hybrid_selection_metrics.txt', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.hybrid_selection_metrics.per_target.txt', name=sample_names), | |
| # collect_insert_size_metrics | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.insert_size_metrics.txt', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.insert_size_metrics.pdf', name=sample_names), | |
| # collect_sequencing_artifact_metrics | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.bait_bias_detail_metrics.txt', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.bait_bias_summary_metrics.txt', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.error_summary_metrics.txt', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.pre_adapter_detail_metrics.txt', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.pre_adapter_summary_metrics.txt', name=sample_names), | |
| # collect_base_distribution_by_cycle | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.base_distribution_by_cycle.txt', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.base_distribution_by_cycle.pdf', name=sample_names), | |
| # collect_quality_score_distribution | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.quality_score_distribution.txt', name=sample_names), | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.quality_score_distribution.pdf', name=sample_names), | |
| # collect_gc_bias_metrics | |
| expand(f'{run_output}/{{name}}/{{name}}.mapped.gc_bias_metrics.txt', name=sample_names), | |
| rule create_basecalling_params: | |
| output: | |
| barcode_params=f'{run_output}/basecalling/barcode_params.{{lane}}.txt', | |
| library_params=f'{run_output}/basecalling/library_params.{{lane}}.txt' | |
| run: sample_sheet.to_picard_basecalling_params( | |
| directory=f'{run_output}/basecalling/', | |
| bam_prefix=run_output, | |
| lanes=lanes) | |
| rule check_illumina_directory: | |
| input: basecalls_dir | |
| output: f'{run_output}/logs/CheckIlluminaDirectory.log' | |
| priority: 100 | |
| params: read_structure=read_structure, lanes=lanes | |
| log: f'{run_output}/logs/CheckIlluminaDirectory.log' | |
| wrapper: f'{wrapper_uri}/picard_check_illumina_directory' | |
| rule collect_illumina_lane_metrics: | |
| input: run_folder | |
| output: | |
| lane_metrics=f'{run_output}/basecalling/{run_barcode}.illumina_lane_metrics.txt', | |
| phasing_metrics=f'{run_output}/basecalling/{run_barcode}.illumina_phasing_metrics.txt' | |
| params: | |
| read_structure=read_structure, | |
| file_extension='.txt' | |
| log: f'{run_output}/logs/CollectIlluminaLaneMetrics.{run_barcode}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_illumina_lane_metrics' | |
| rule extract_illumina_barcodes: | |
| input: | |
| basecalls_dir=basecalls_dir, | |
| barcode_file=rules.create_basecalling_params.output.barcode_params | |
| output: | |
| metrics_file=f'{run_output}/basecalling/barcodes.{{lane}}/barcode_counts.lane-{{lane}}.metrics.txt', | |
| barcodes_dir=f'{run_output}/basecalling/barcodes.{{lane}}' | |
| threads: max(1, int(threads / len(lanes))) | |
| params: | |
| lane='{lane}', | |
| compress_outputs=True, | |
| read_structure=read_structure | |
| log: f'{run_output}/logs/ExtractIlluminaBarcodes.{{lane}}.log' | |
| wrapper: f'{wrapper_uri}/picard_extract_illumina_barcodes' | |
| rule collect_illumina_basecalling_metrics: | |
| input: | |
| basecalls_dir=basecalls_dir, | |
| barcodes_dir=rules.extract_illumina_barcodes.output.barcodes_dir, | |
| barcode_file=rules.create_basecalling_params.output | |
| output: f'{run_output}/basecalling/basecalling.lane-{{lane}}.metrics.txt', | |
| params: | |
| lane='{lane}', | |
| read_structure=read_structure | |
| log: f'{run_output}/logs/CollectIlluminaBasecallingMetrics.{{lane}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_illumina_basecalling_metrics' | |
| rule illumina_basecalls_to_sam: | |
| input: | |
| basecalls_dir=basecalls_dir, | |
| barcodes_dir=rules.extract_illumina_barcodes.output.barcodes_dir, | |
| library_params=rules.create_basecalling_params.output.library_params | |
| output: list_unmerged_sample_bam() | |
| threads: max(1, int(threads / len(lanes))) | |
| resources: | |
| gc_time_limit=50, | |
| gc_heap_free_limit=10, | |
| malloc=lambda wildcards, attempt: int(attempt * 1.5 * 8192), | |
| samjdk_buffer_size=131072, | |
| use_async_io_read_samtools=1, | |
| use_async_io_write_samtools=1 | |
| params: | |
| lane='{lane}', | |
| run_barcode=run_barcode, | |
| sequencing_center=sequencing_center, | |
| run_start_date=run_start_date, | |
| read_structure=read_structure, | |
| adapters_to_check=['INDEXED', 'NEXTERA_V2', 'FLUIDIGM'], | |
| include_non_pf_reads=False | |
| log: f'{run_output}/logs/IlluminaBasecallsToSam.{{lane}}.log' | |
| wrapper: f'{wrapper_uri}/picard_illumina_basecalls_to_sam' | |
| rule collect_quality_yield_metrics: | |
| input: list_unmapped_bams_for_sample_name | |
| output: f'{run_output}/{{name}}/{{name}}.unmapped.quality_yield_metrics.txt' | |
| params: | |
| assume_sorted=True, | |
| validation_stringency='SILENT' | |
| log: f'{run_output}/logs/CollectQualityYieldMetrics.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_quality_yield_metrics' | |
| rule merge_sam_files: | |
| input: list_unmapped_bams_for_sample_name | |
| output: temp(f'{run_output}/{{name}}/{{name}}.unmapped.bam') | |
| resources: | |
| samjdk_buffer_size=131072, | |
| use_async_io_read_samtools=1, | |
| use_async_io_write_samtools=1 | |
| params: | |
| sort_order='coordinate', | |
| use_threading=True | |
| log: f'{run_output}/logs/MergeSameFiles.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_merge_sam_files' | |
| rule align_with_bwa_mem: | |
| input: | |
| reference=reference, | |
| unmapped_bam=rules.merge_sam_files.output | |
| output: f'{run_output}/{{name}}/{{name}}.mapped.raw.bam' | |
| threads: max(1, int(threads / len(lanes))) | |
| params: | |
| sam_to_fastq={ | |
| 'clipping_action': 'N', | |
| 'clipping_attribute': 'XT', | |
| 'clipping_min_length': 25, | |
| 'interleave': True, | |
| 'include_non_pf_reads': False}, | |
| bwa_mem={'p': True, 'v': 2}, | |
| merge_bam_alignment={ | |
| 'aligner_proper_pair_flags': False, | |
| 'attributes_to_retain': ['X0', 'ZS', 'ZI', 'ZM', 'ZC', 'ZN'], | |
| 'clip_adapters': False, | |
| 'expected_orientations': 'FR', | |
| 'max_insertions_or_deletions': -1, | |
| 'sort_order': 'coordinate'} | |
| log: f'{run_output}/logs/AlignWithBwaMem.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/bwa_mem' | |
| rule mark_duplicates: | |
| input: rules.align_with_bwa_mem.output | |
| output: | |
| bam=f'{run_output}/{{name}}/{{name}}.mapped.bam', | |
| metrics=f'{run_output}/{{name}}/{{name}}.mapped.duplicate_metrics.txt' | |
| params: create_index=True | |
| log: f'{run_output}/logs/MarkDuplicates.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_mark_duplicates' | |
| rule collect_hs_metrics: | |
| input: | |
| reference=reference, | |
| bam=rules.mark_duplicates.output.bam, | |
| bait_intervals=bait_intervals, | |
| output: | |
| summary_output=f'{run_output}/{{name}}/{{name}}.mapped.hybrid_selection_metrics.txt', | |
| per_target_coverage=f'{run_output}/{{name}}/{{name}}.mapped.hybrid_selection_metrics.per_target.txt', | |
| resources: | |
| gc_time_limit=50, | |
| gc_heap_free_limit=10, | |
| malloc=lambda wildcards, attempt: int(attempt * 1.5 * 4096), | |
| samjdk_buffer_size=131072 | |
| log: f'{run_output}/logs/CollectHsMetrics.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_hs_metrics' | |
| rule alignment_summary_metrics: | |
| input: | |
| reference=reference, | |
| bam=rules.mark_duplicates.output.bam | |
| output: f'{run_output}/{{name}}/{{name}}.mapped.alignment_summary_metrics.txt' | |
| log: f'{run_output}/logs/CollectAlignmentSummaryMetrics.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_alignment_summary_metrics' | |
| rule collect_insert_size_metrics: | |
| input: rules.mark_duplicates.output.bam | |
| output: | |
| metrics_file=f'{run_output}/{{name}}/{{name}}.mapped.insert_size_metrics.txt', | |
| histogram_file=f'{run_output}/{{name}}/{{name}}.mapped.insert_size_metrics.pdf' | |
| log: f'{run_output}/logs/CollectInsertSizeMetrics.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_insert_size_metrics' | |
| rule collect_sequencing_artifact_metrics: | |
| input: | |
| reference=reference, | |
| bam=rules.mark_duplicates.output.bam | |
| output: | |
| f'{run_output}/{{name}}/{{name}}.mapped.bait_bias_detail_metrics.txt', | |
| f'{run_output}/{{name}}/{{name}}.mapped.bait_bias_summary_metrics.txt', | |
| f'{run_output}/{{name}}/{{name}}.mapped.error_summary_metrics.txt', | |
| f'{run_output}/{{name}}/{{name}}.mapped.pre_adapter_detail_metrics.txt', | |
| f'{run_output}/{{name}}/{{name}}.mapped.pre_adapter_summary_metrics.txt' | |
| params: | |
| output=f'{run_output}/{{name}}/{{name}}.mapped', | |
| file_extension='.txt' | |
| resources: | |
| malloc=lambda wildcards, attempt: int(attempt * 1.5 * 4096) | |
| log: f'{run_output}/logs/CollectSequencingArtifactMetrics.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_sequencing_artifact_metrics' | |
| rule collect_oxo_g_metrics: | |
| input: | |
| reference=reference, | |
| bam=rules.mark_duplicates.output.bam | |
| output: f'{run_output}/{{name}}/{{name}}.mapped.oxo_g_metrics.txt' | |
| log: f'{run_output}/logs/CollectOxoGMetrics.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_oxo_g_metrics' | |
| rule collect_base_distribution_by_cycle: | |
| input: rules.mark_duplicates.output.bam | |
| output: | |
| metrics_file=f'{run_output}/{{name}}/{{name}}.mapped.base_distribution_by_cycle.txt', | |
| chart_output=f'{run_output}/{{name}}/{{name}}.mapped.base_distribution_by_cycle.pdf' | |
| log: f'{run_output}/logs/CollectBaseDistributionByCycle.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_base_distribution_by_cycle' | |
| rule collect_quality_score_distribution: | |
| input: rules.mark_duplicates.output.bam | |
| output: | |
| metrics_file=f'{run_output}/{{name}}/{{name}}.mapped.quality_score_distribution.txt', | |
| chart_output=f'{run_output}/{{name}}/{{name}}.mapped.quality_score_distribution.pdf' | |
| log: f'{run_output}/logs/QualityScoreDistribution.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_quality_score_distribution' | |
| rule collect_gc_bias_metrics: | |
| input: rules.mark_duplicates.output.bam | |
| output: | |
| metrics_file=f'{run_output}/{{name}}/{{name}}.mapped.gc_bias_metrics.txt', | |
| chart_output=f'{run_output}/{{name}}/{{name}}.mapped.gc_bias_metrics.pdf', | |
| summary_output=f'{run_output}/{{name}}/{{name}}.mapped.gc_bias_metrics.summary.txt' | |
| log: f'{run_output}/logs/CollectGcBiasMetrics.{{name}}.log' | |
| wrapper: f'{wrapper_uri}/picard_collect_gc_bias_metrics' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment