Created
February 9, 2017 16:30
-
-
Save jfear/f2614604386275fd0a66fe0a197d22dd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# vim: set ft=snakemake: | |
shell.prefix("""set -euo pipefail; | |
if [ -e /lscratch/$SLURM_JOBID ]; then | |
export TMPDIR=/lscratch/$SLURM_JOBID | |
else | |
export TMPDIR=/tmp | |
fi; | |
""") | |
workdir: '.' | |
################################################################################ | |
# Config | |
################################################################################ | |
sra_file = '/data/Oliverlab/project/remap/data/sra_info_dmel_13050.txt' | |
BAM_dir = '/data/Oliverlab/project/remap/result/pre_alignment' | |
hisat2_version = '2.0.4' | |
stringtie_version = '1.2.3' | |
samtools_version = '1.3' | |
rseqc_version = '2.6.3' | |
splice_file = '/data/Oliverlab/data/FlyBase/FB2015_04/dmel_ERCC.splicesites.txt' | |
hisat2_index = '/data/Oliverlab/data/FlyBase/FB2015_04/dmel_ERCC' | |
bed12_file = '/data/Oliverlab/data/FlyBase/FB2015_04/dmel_ERCC.bed12' | |
################################################################################ | |
# Sample Information | |
################################################################################ | |
import pandas as pd | |
sras = pd.read_csv(sra_file, sep='\t', header=None) | |
#ALL_SAMPLES = sras.iloc[:,0] | |
ALL_SAMPLES = sras.iloc[:1000,0] | |
################################################################################ | |
# Targets | |
################################################################################ | |
ALL_BAM = expand("{path}/{sample}/{sample}.bam", path = BAM_dir, sample = ALL_SAMPLES) | |
ALL_EXP = expand("{path}/{sample}/{sample}.exp", path = BAM_dir, sample = ALL_SAMPLES) | |
ALL_BAMSTAT = expand("{path}/{sample}/{sample}.bam_stat", path = BAM_dir, sample = ALL_SAMPLES) | |
ALL_TIN = expand("{path}/{sample}/{sample}.summary.txt", path = BAM_dir, sample = ALL_SAMPLES) | |
ALL_COVERAGE = expand("{path}/{sample}/{sample}.geneBodyCoverage.r", path = BAM_dir, sample = ALL_SAMPLES) | |
#ALL_HTSEQCOUNT = expand(config["BAM_dir"] + "/{sample}/{sample}.htseq_count", sample = ALL_SAMPLES) | |
rule all: | |
input: ALL_BAM + ALL_EXP + ALL_BAMSTAT + ALL_TIN + ALL_COVERAGE | |
# input: ALL_BAM + ALL_EXP + ALL_BAMSTAT + ALL_TIN + ALL_COVERAGE + ALL_LOG + ALL_GTF + ALL_HTSEQCOUNT | |
localrules: all | |
################################################################################ | |
# Rules | |
################################################################################ | |
rule hisat2: | |
output: bam = '{prefix}/{sample}.bam', | |
bai = '{prefix}/{sample}.bam.bai' | |
log: "{prefix}/{sample}.log" | |
threads: 6 | |
params: | |
hisat_version = hisat2_version, | |
samtools_version = samtools_version, | |
hisat_index = hisat2_index, | |
sra = '{sample}', | |
splice_file = splice_file | |
shell: | |
""" | |
module load hisat/{params.hisat_version} | |
module load samtools/{params.samtools_version} | |
# Stage Index | |
if [ ! -e $TMPDIR/references ]; then | |
mkdir $TMPDIR/references | |
cp {params.hisat_index}*.ht2 $TMPDIR/references/ | |
fi | |
REF=$TMPDIR/references/$(basename {params.hisat_index}) | |
# Run Hisat2 | |
hisat2 -p {threads} \\ | |
-x $REF \\ | |
--dta \\ | |
--sra-acc {params.sra} \\ | |
--known-splicesite-infile {params.splice_file} 2> {log} | samtools view -b | \\ | |
samtools sort - \\ | |
-T $TMPDIR/{wildcards.sample}.sort \\ | |
-o $TMPDIR/{wildcards.sample}.bam | |
samtools index $TMPDIR/{wildcards.sample}.bam | |
cp $TMPDIR/{wildcards.sample}.bam {output.bam} | |
cp $TMPDIR/{wildcards.sample}.bam.bai {output.bai} | |
""" | |
rule rseqc: | |
input: '{prefix}/{sample}.bam' | |
output: infer_exp = '{prefix}/{sample}.exp', | |
bam_stat = '{prefix}/{sample}.bam_stat', | |
tin_txt = '{prefix}/{sample}.summary.txt', | |
tin_tsv = '{prefix}/{sample}.tin.tsv', | |
coverage = '{prefix}/{sample}.geneBodyCoverage.r' | |
params: | |
rseqc_version = rseqc_version, | |
bed12_file = bed12_file, | |
prefix = '{prefix}/{sample}' | |
shell: | |
""" | |
module load rseqc/{params.rseqc_version} | |
module load R | |
# Move to TMPDIR | |
cd $TMPDIR | |
BED12=$TMPDIR/$(basename {params.bed12_file}) | |
if [ ! -e $BED12 ]; then | |
cp {params.bed12_file} $BED12 | |
fi | |
BAM=$TMPDIR/$(basename {input[0]}) | |
if [ ! -e $BAM ]; then | |
cp {input[0]} $BAM | |
cp {input[0]}.bai $BAM.bai | |
fi | |
# Infer Experiment | |
infer_experiment.py -i $BAM -r $BED12 > {output.infer_exp} | |
# BAM stat | |
bam_stat.py -i $BAM > {output.bam_stat} | |
# TIN | |
tin.py -i $BAM -r $BED12 | |
cp {wildcards.sample}.summary.txt {output.tin_txt} | |
cp {wildcards.sample}.tin.xls {output.tin_tsv} | |
# Coverage | |
geneBody_coverage.py -i $BAM -r $BED12 -o {params.prefix} | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment