Skip to content

Instantly share code, notes, and snippets.

@mikelove
Last active January 29, 2024 20:17
Show Gist options
  • Save mikelove/5a8134e57f652f970f1a176efc900cbe to your computer and use it in GitHub Desktop.
Save mikelove/5a8134e57f652f970f1a176efc900cbe to your computer and use it in GitHub Desktop.
my Salmon Snakemake file
#!/bin/bash
#
#SBATCH --job-name=snake
#SBATCH --time=240
#SBATCH --mem=1000
module load python
snakemake -j 4 --latency-wait 30 --cluster "sbatch -n 12 -N 1 --mem=10000 --time 60"
RUNS, = glob_wildcards("fastq/{run}_1.fastq.gz")
SALMON = "/proj/milovelab/bin/salmon-1.4.0_linux_x86_64/bin/salmon"
ANNO = "/proj/milovelab/anno"
rule all:
input: expand("quants/{run}/quant.sf", run=RUNS)
rule salmon_index:
input: "{ANNO}/gencode.vXYZ.transcripts.fa.gz"
output: directory("{ANNO}/gencode.vXYZ-salmon_1.4.0")
shell: "{SALMON} index --gencode -p 12 -t {input} -i {output}"
rule salmon_quant:
input:
r1 = "/pine/scr/m/i/milove/{sample}_1.fastq.gz",
r2 = "/pine/scr/m/i/milove/{sample}_2.fastq.gz",
index = "/proj/milovelab/anno/gencode.vXYZ-salmon_1.4.0"
output:
"quants/{sample}/quant.sf"
params:
dir = "quants/{sample}"
shell:
"{SALMON} quant -i {input.index} -l A -p 12 --gcBias "
"--numGibbsSamples 20 --thinningFactor 100 "
"-o {params.dir} -1 {input.r1} -2 {input.r2}"
RUNS, = glob_wildcards("fastq/{run}_1.fastq.gz")
READS = ["1", "2"]
SALMON = "/proj/milovelab/bin/salmon-1.4.0_linux_x86_64/bin/salmon"
ANNO = "/proj/milovelab/anno"
rule all:
input: "multiqc/multiqc_report.html"
rule salmon_index:
input: "{ANNO}/gencode.v38.transcripts.fa.gz"
output: directory("{ANNO}/gencode.v38-salmon_1.4.0")
shell: "{SALMON} index --gencode -p 12 -t {input} -i {output}"
rule salmon_quant:
input:
r1 = "fastq/{sample}_1.fastq.gz",
r2 = "fastq/{sample}_2.fastq.gz",
index = "/proj/milovelab/anno/gencode.v38-salmon_1.4.0"
output:
"quants/{sample}/quant.sf"
params:
dir = "quants/{sample}"
shell:
"{SALMON} quant -i {input.index} -l A -p 12 --gcBias "
"--numGibbsSamples 20 --thinningFactor 100 "
"-o {params.dir} -1 {input.r1} -2 {input.r2}"
rule fastqc:
input:
"fastq/{sample}.fastq.gz"
output:
"qc/{sample}/{sample}_fastqc.html"
params:
dir = "qc/{sample}"
shell:
"fastqc --quiet -t 12 --outdir {params.dir} {input}"
rule multiqc:
input:
expand(["quants/{run}/quant.sf",
"qc/{run}_{read}/{run}_{read}_fastqc.html"],
run=RUNS, read=READS)
output:
"multiqc/multiqc_report.html"
shell:
"multiqc . -o multiqc"
scratch <- "/pine/scr/m/i/milove"
# see https://gist.github.com/mikelove/f539631f9e187a8931d34779436a1c01 for accession2url() definition
source("https://gist.githubusercontent.com/mikelove/f539631f9e187a8931d34779436a1c01/raw/6e6633aa5123358b70390ab738be1eef03a3df31/accession2url.R")
for (i in 1:nrow(x)) {
print(paste("---",i,"---"))
run <- x$Run[i]
for (read in 1:2) {
file <- paste0(run,"_",read,".fastq.gz")
url <- file.path(accession2url(run), file)
dest <- file.path(scratch, file)
if (!file.exists(dest))
download.file(url, dest)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment