Skip to content

Instantly share code, notes, and snippets.

@brwnj
Created February 25, 2013 23:19
Show Gist options
  • Save brwnj/5034269 to your computer and use it in GitHub Desktop.
Save brwnj/5034269 to your computer and use it in GitHub Desktop.
old gsnap/cuffdiff
#! /usr/bin/env bash
#BSUB -J align[1-12]
#BSUB -e gsnap.%J.%I.err
#BSUB -o gsnap.%J.%I.out
#BSUB -q normal
#BSUB -R "select[mem>16] rusage[mem=16] span[hosts=1]"
#BSUB -n 4
<<DOC
align rnaseq samples using gsnap against mm9
DOC
set -o nounset -o pipefail -o errexit -x
source experiment.cfg
sample=${SAMPLES[$LSB_JOBINDEX]}
reads=$DATA/$sample.fastq.gz
outdir=$RESULTS/common/$sample
bam=$outdir/$sample.bam
stats=$outdir/$sample.stats
if [ ! -f $bam ]; then
gsnap -D $GMAPDB -d mm9 --gunzip -s $KNOWNSITES -v snp128_strict_wholeChrs \
-n1 -B5 -Q --nofails -t4 -A sam --pairexpect=250 --pairdev=150 \
--read-group-id=$sample --read-group-name=$sample $reads \
| samtools view -ShuF 4 - \
| samtools sort -o - $sample.temp -m 9500000000 > $bam
samtools index $bam
fi
if [ ! -f $stats ]; then
java -Xmx8g -jar $PICARD/CollectMultipleMetrics.jar \
INPUT=$bam \
REFERENCE_SEQUENCE=$REFERENCE \
ASSUME_SORTED=true \
OUTPUT=$stats \
PROGRAM=CollectAlignmentSummaryMetrics \
PROGRAM=CollectInsertSizeMetrics \
PROGRAM=QualityScoreDistribution \
PROGRAM=MeanQualityByCycle
fi
#! /usr/bin/env bash
#BSUB -J cuffdiff[1-18]
#BSUB -e cuffdiff.%J.%I.err
#BSUB -o cuffdiff.%J.%I.out
#BSUB -q normal
#BSUB -R "select[mem>20] rusage[mem=20] span[hosts=1]"
#BSUB -n 4
<<DOC
assemble transcripts and perform differential expression testing.
DOC
set -o nounset -o pipefail -o errexit -x
source experiment.cfg
# TODO: need to figure out a better way to handle test groups
pair=${TESTS[$LSB_JOBINDEX]}
testname=${TESTNAMES[$LSB_JOBINDEX]}
outdir=$RESULTS/common/$testname
label=$(echo $testname | sed 's:\_vs\_:,:')
if [[ ! -f $outdir/gene_exp.diff ]]; then
# --min-reps-for-js-test 1 number of biological replicates requires for testing
options="-o $outdir -p4 -g $GTF -b $FASTA -u --min-reps-for-js-test 1 -L $label"
cuffdiff $options $pair
# gfold
sample1=$(echo $pair | cut -f1 -d" ")
sample2=$(echo $pair | cut -f2 -d" ")
outfile=$RESULTS/common/$testname/$testname.gfold.diff
samtools view $sample1 | gfold count -ann $GTF -annf GTF -tag stdin -o ${sample1/.bam/.read_cnt}
samtools view $sample2 | gfold count -ann $GTF -annf GTF -tag stdin -o ${sample2/.bam/.read_cnt}
gfold diff -s1 ${sample1/.bam/} -s2 ${sample2/.bam/} -suf .read_cnt -o $outfile
fi
#!/usr/bin/env bash
set -o nounset -x
BASE=$HOME/projects/xxxxxx
BIN=$BASE/bin
DATA=$BASE/data
RESULTS=$BASE/results
FASTQC=/vol1/home/brownj/opt/fastqc/fastqc
SAMPLES=(idx0
11_ARS_A1_CO5E6ACXX_3_111114_ATCACG
12_BL6_CO5E6ACXX_3_111114_CGATGT
13_MD4_CO5E6ACXX_3_111114_TTAGGC
14_MD4xML5_CO5E6ACXX_3_111114_TGACCA
1_MD4xML_ACAGTG
2_MD4B220_CAGATC
3_ARS_A1_ATGTCA
4_BL6B220_CGATGT
5_BL6Alpha_GTGAAA
6_BL6Gamma_GTCCGC
7_BL6-FO_CCGTCC
8_BL6-ACT_TGACCA)
GTF=$HOME/ref/mm9/mm9.ncbi37.gtf
FASTA=$HOME/ref/mm9/mm9.fa
GMAPDB=/vol1/home/gowank/Packages/GMAPDB
KNOWNSITES=$HOME/ref/mm9/mm9_refgene_knowngene.splicesites
PICARD=$HOME/opt/picard-tools-1.74
REFERENCE=$HOME/ref/mm9/mm9.fa
one=$RESULTS/common/1_MD4xML_ACAGTG/1_MD4xML_ACAGTG.bam
two=$RESULTS/common/2_MD4B220_CAGATC/2_MD4B220_CAGATC.bam
three=$RESULTS/common/3_ARS_A1_ATGTCA/3_ARS_A1_ATGTCA.bam
four=$RESULTS/common/4_BL6B220_CGATGT/4_BL6B220_CGATGT.bam
five=$RESULTS/common/5_BL6Alpha_GTGAAA/5_BL6Alpha_GTGAAA.bam
six=$RESULTS/common/6_BL6Gamma_GTCCGC/6_BL6Gamma_GTCCGC.bam
seven=$RESULTS/common/7_BL6-FO_CCGTCC/7_BL6-FO_CCGTCC.bam
eight=$RESULTS/common/8_BL6-ACT_TGACCA/8_BL6-ACT_TGACCA.bam
eleven=$RESULTS/common/11_ARS_A1_CO5E6ACXX_3_111114_ATCACG/11_ARS_A1_CO5E6ACXX_3_111114_ATCACG.bam
twelve=$RESULTS/common/12_BL6_CO5E6ACXX_3_111114_CGATGT/12_BL6_CO5E6ACXX_3_111114_CGATGT.bam
thirteen=$RESULTS/common/13_MD4_CO5E6ACXX_3_111114_TTAGGC/13_MD4_CO5E6ACXX_3_111114_TTAGGC.bam
fourteen=$RESULTS/common/14_MD4xML5_CO5E6ACXX_3_111114_TGACCA/14_MD4xML5_CO5E6ACXX_3_111114_TGACCA.bam
TESTS=(idx0
"$eleven $twelve"
"$thirteen $fourteen"
"$one $two"
"$three $four"
"$four $five"
"$four $six"
"$four $seven"
"$four $eight"
"$five $eight"
"$six $eight"
"$two $three"
"$two $four"
"$two $five"
"$two $six"
"$two $seven"
"$two $eight"
"$thirteen $eleven"
"$thirteen $twelve")
TESTNAMES=(idx0
11_vs_12
13_vs_14
1_vs_2
3_vs_4
4_vs_5
4_vs_6
4_vs_7
4_vs_8
5_vs_8
6_vs_8
2_vs_3
2_vs_4
2_vs_5
2_vs_6
2_vs_7
2_vs_8
13_vs_11
13_vs_12)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment