Last active
November 8, 2017 14:46
-
-
Save lpenguin/47ee505d2b6328e6ef8dba2badabe9ee to your computer and use it in GitHub Desktop.
Snakemake
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bowtie2.bin: /srv/common/bin/bowtie2 | |
bowtie2-build.bin: /srv/common/bin/bowtie2-build | |
bowtie2.additional.params: -k 1 | |
bowtie2.threads: 20 | |
bowtie2.index: <some path to index> | |
samtools.bin: /srv/common/bin/samtools | |
genome_coverage.bin: /srv/common/bin/genomeCoverageBed |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# --drmaa ' -pe make {threads}': how to pass required number of threads to grid engine scheduler | |
# -k: do not stop if one of jobs fail | |
# -p: print exec'ed command | |
# --latency-wait 120: wait for file timeout (FS syncronisation issue) | |
# -j: maximum amount of simultanious jobs | |
# all: - target rule name | |
snakemake --drmaa-log-dir drmaa_log \ | |
--drmaa ' -pe make {threads}' \ | |
-k \ | |
-p \ | |
--latency-wait 120 \ | |
-j 18 \ | |
all |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import exists | |
configfile: 'config.yml' | |
BOWTIE2_BIN = config['bowtie2.bin'] | |
BOWTIE2_INDEX = config['bowtie2.index'] | |
BOWTIE2_THREADS = config['bowtie2.threads'] | |
SAMTOOLS_BIN = config['samtools.bin'] | |
GENOME_COVERAGE_BIN = config['genome_coverage.bin'] | |
SAMPLES, EXT = glob_wildcards('data/reads/{sample}.R1.{ext}') | |
SAMPLES = list(set(SAMPLES)) | |
ruleorder: _bowtie2_paired_and_single > _bowtie2_paired | |
def multiple_exts(template, *extensions): | |
def wrapped_input(wildcards): | |
for ext in extensions: | |
p = template.format(**wildcards) + ext | |
if exists(p): | |
return p | |
return p | |
return wrapped_input | |
# TODO: pipe intermediate results | |
# # map reads and sort bam file | |
# bowtie2 -x refgenome --no-unal -U ${WGS_SAMPLE} -S - -p 12 | \ | |
# samtools view -bS - | \ | |
# samtools sort -m 5G - mapping_result_sorted.bam | |
# -U ${WGS_SAMPLE} consider all reads as unpaired reads | |
# -S - bowtie output in SAM format, write to standard out | |
# -x refgenome use reference genome bowtie2-database | |
# --no-unal suppress SAM records for reads that failed to align | |
# -p 12 use 12 processors | |
localrules: _bam_sort, _bam_index, _coverage, _reads_count | |
rule all: | |
input: | |
cov=expand("data/coverage/{sample}.coverage.txt", sample=SAMPLES), | |
counts=expand("data/count/{sample}.count.txt", sample=SAMPLES), | |
rule _bowtie2_paired_and_single: | |
input: | |
r1=multiple_exts("data/reads/{sample}.R1", '.fastq', '.fastq.gz', '.fq', '.fq.gz'), | |
r2=multiple_exts("data/reads/{sample}.R2", '.fastq', '.fastq.gz', '.fq', '.fq.gz'), | |
s=multiple_exts("data/reads/{sample}.S", '.fastq', '.fastq.gz', '.fq', '.fq.gz'), | |
threads: min(20, BOWTIE2_THREADS) | |
output: | |
sam=temp("data/sam/{sample}.sam"), | |
log=temp("log/bowtie2/{sample}_R12S.log"), | |
log: "log/bowtie2/{sample}_R12S.log" | |
shell: | |
"{BOWTIE2_BIN} \ | |
{config[bowtie2.additional.params]} \ | |
-x {BOWTIE2_INDEX} \ | |
-S {output.sam} \ | |
--threads {threads} \ | |
-1 {input.r1} -2 {input.r2} -U {input.s} \ | |
--no-unal \ | |
>{log} 2>&1 \ | |
" | |
rule _bowtie2_paired: | |
input: | |
r1=multiple_exts("data/reads/{sample}.R1", '.fastq', '.fastq.gz', '.fq', '.fq.gz'), | |
r2=multiple_exts("data/reads/{sample}.R2", '.fastq', '.fastq.gz', '.fq', '.fq.gz'), | |
threads: min(20, BOWTIE2_THREADS) | |
output: | |
sam=temp("data/sam/{sample}.sam"), | |
log=temp("log/bowtie2/{sample}_R12S.log"), | |
log: "log/bowtie2/{sample}_R12S.log" | |
shell: | |
"{BOWTIE2_BIN} \ | |
{config[bowtie2.additional.params]} \ | |
-x {BOWTIE2_INDEX} \ | |
-S {output.sam} \ | |
--threads {threads} \ | |
-1 {input.r1} -2 {input.r2} \ | |
--no-unal \ | |
>{log} 2>&1 \ | |
" | |
rule _reads_count: | |
input: "log/bowtie2/{sample}_R12S.log" | |
output: "data/count/{sample}.count.txt" | |
shell: | |
"head {input} -n 1 | awk '{{print $1}}' > {output}" | |
rule _bam_convert: | |
input: "data/sam/{sample}.sam" | |
output: temp("data/bam/{sample}.bam") | |
threads: 5 | |
log: "log/bam_convert/{sample}.log" | |
shell: | |
"{SAMTOOLS_BIN} view -@ {threads} -bS {input} -o {output} >{log} 2>&1" | |
rule _bam_sort: | |
input: "data/bam/{sample}.bam" | |
output: "data/bam_sorted/{sample}.bam" | |
params: | |
output="data/bam_sorted/{sample}" | |
log: "log/bam_sort/{sample}.log" | |
shell: | |
"{SAMTOOLS_BIN} sort {input} {params.output} >{log} 2>&1" | |
rule _bam_index: | |
input: "data/bam_sorted/{sample}.bam" | |
output: "data/bam_sorted/{sample}.bam.bai" | |
log: "log/bam_index/{sample}.log" | |
shell: | |
"{SAMTOOLS_BIN} index {input} >{log} 2>&1" | |
rule _coverage: | |
input: | |
bam="data/bam_sorted/{sample}.bam", | |
bai="data/bam_sorted/{sample}.bam.bai" | |
output: "data/coverage/{sample}.coverage.txt" | |
shell: | |
"{GENOME_COVERAGE_BIN} -bga -ibam {input.bam} > {output}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Log file is described twice: first and second. Moreover the first time is described as temp file. Is it how supposed to be?