Created
February 25, 2013 23:19
-
-
Save brwnj/5034269 to your computer and use it in GitHub Desktop.
old gsnap/cuffdiff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
#BSUB -J align[1-12] | |
#BSUB -e gsnap.%J.%I.err | |
#BSUB -o gsnap.%J.%I.out | |
#BSUB -q normal | |
#BSUB -R "select[mem>16] rusage[mem=16] span[hosts=1]" | |
#BSUB -n 4 | |
<<DOC | |
align rnaseq samples using gsnap against mm9 | |
DOC | |
set -o nounset -o pipefail -o errexit -x | |
source experiment.cfg | |
sample=${SAMPLES[$LSB_JOBINDEX]} | |
reads=$DATA/$sample.fastq.gz | |
outdir=$RESULTS/common/$sample | |
bam=$outdir/$sample.bam | |
stats=$outdir/$sample.stats | |
if [ ! -f $bam ]; then | |
gsnap -D $GMAPDB -d mm9 --gunzip -s $KNOWNSITES -v snp128_strict_wholeChrs \ | |
-n1 -B5 -Q --nofails -t4 -A sam --pairexpect=250 --pairdev=150 \ | |
--read-group-id=$sample --read-group-name=$sample $reads \ | |
| samtools view -ShuF 4 - \ | |
| samtools sort -o - $sample.temp -m 9500000000 > $bam | |
samtools index $bam | |
fi | |
if [ ! -f $stats ]; then | |
java -Xmx8g -jar $PICARD/CollectMultipleMetrics.jar \ | |
INPUT=$bam \ | |
REFERENCE_SEQUENCE=$REFERENCE \ | |
ASSUME_SORTED=true \ | |
OUTPUT=$stats \ | |
PROGRAM=CollectAlignmentSummaryMetrics \ | |
PROGRAM=CollectInsertSizeMetrics \ | |
PROGRAM=QualityScoreDistribution \ | |
PROGRAM=MeanQualityByCycle | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
#BSUB -J cuffdiff[1-18] | |
#BSUB -e cuffdiff.%J.%I.err | |
#BSUB -o cuffdiff.%J.%I.out | |
#BSUB -q normal | |
#BSUB -R "select[mem>20] rusage[mem=20] span[hosts=1]" | |
#BSUB -n 4 | |
<<DOC | |
assemble transcripts and perform differential expression testing. | |
DOC | |
set -o nounset -o pipefail -o errexit -x | |
source experiment.cfg | |
# TODO: need to figure out a better way to handle test groups | |
pair=${TESTS[$LSB_JOBINDEX]} | |
testname=${TESTNAMES[$LSB_JOBINDEX]} | |
outdir=$RESULTS/common/$testname | |
label=$(echo $testname | sed 's:\_vs\_:,:') | |
if [[ ! -f $outdir/gene_exp.diff ]]; then | |
# --min-reps-for-js-test 1 number of biological replicates requires for testing | |
options="-o $outdir -p4 -g $GTF -b $FASTA -u --min-reps-for-js-test 1 -L $label" | |
cuffdiff $options $pair | |
# gfold | |
sample1=$(echo $pair | cut -f1 -d" ") | |
sample2=$(echo $pair | cut -f2 -d" ") | |
outfile=$RESULTS/common/$testname/$testname.gfold.diff | |
samtools view $sample1 | gfold count -ann $GTF -annf GTF -tag stdin -o ${sample1/.bam/.read_cnt} | |
samtools view $sample2 | gfold count -ann $GTF -annf GTF -tag stdin -o ${sample2/.bam/.read_cnt} | |
gfold diff -s1 ${sample1/.bam/} -s2 ${sample2/.bam/} -suf .read_cnt -o $outfile | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -o nounset -x | |
BASE=$HOME/projects/xxxxxx | |
BIN=$BASE/bin | |
DATA=$BASE/data | |
RESULTS=$BASE/results | |
FASTQC=/vol1/home/brownj/opt/fastqc/fastqc | |
SAMPLES=(idx0 | |
11_ARS_A1_CO5E6ACXX_3_111114_ATCACG | |
12_BL6_CO5E6ACXX_3_111114_CGATGT | |
13_MD4_CO5E6ACXX_3_111114_TTAGGC | |
14_MD4xML5_CO5E6ACXX_3_111114_TGACCA | |
1_MD4xML_ACAGTG | |
2_MD4B220_CAGATC | |
3_ARS_A1_ATGTCA | |
4_BL6B220_CGATGT | |
5_BL6Alpha_GTGAAA | |
6_BL6Gamma_GTCCGC | |
7_BL6-FO_CCGTCC | |
8_BL6-ACT_TGACCA) | |
GTF=$HOME/ref/mm9/mm9.ncbi37.gtf | |
FASTA=$HOME/ref/mm9/mm9.fa | |
GMAPDB=/vol1/home/gowank/Packages/GMAPDB | |
KNOWNSITES=$HOME/ref/mm9/mm9_refgene_knowngene.splicesites | |
PICARD=$HOME/opt/picard-tools-1.74 | |
REFERENCE=$HOME/ref/mm9/mm9.fa | |
one=$RESULTS/common/1_MD4xML_ACAGTG/1_MD4xML_ACAGTG.bam | |
two=$RESULTS/common/2_MD4B220_CAGATC/2_MD4B220_CAGATC.bam | |
three=$RESULTS/common/3_ARS_A1_ATGTCA/3_ARS_A1_ATGTCA.bam | |
four=$RESULTS/common/4_BL6B220_CGATGT/4_BL6B220_CGATGT.bam | |
five=$RESULTS/common/5_BL6Alpha_GTGAAA/5_BL6Alpha_GTGAAA.bam | |
six=$RESULTS/common/6_BL6Gamma_GTCCGC/6_BL6Gamma_GTCCGC.bam | |
seven=$RESULTS/common/7_BL6-FO_CCGTCC/7_BL6-FO_CCGTCC.bam | |
eight=$RESULTS/common/8_BL6-ACT_TGACCA/8_BL6-ACT_TGACCA.bam | |
eleven=$RESULTS/common/11_ARS_A1_CO5E6ACXX_3_111114_ATCACG/11_ARS_A1_CO5E6ACXX_3_111114_ATCACG.bam | |
twelve=$RESULTS/common/12_BL6_CO5E6ACXX_3_111114_CGATGT/12_BL6_CO5E6ACXX_3_111114_CGATGT.bam | |
thirteen=$RESULTS/common/13_MD4_CO5E6ACXX_3_111114_TTAGGC/13_MD4_CO5E6ACXX_3_111114_TTAGGC.bam | |
fourteen=$RESULTS/common/14_MD4xML5_CO5E6ACXX_3_111114_TGACCA/14_MD4xML5_CO5E6ACXX_3_111114_TGACCA.bam | |
TESTS=(idx0 | |
"$eleven $twelve" | |
"$thirteen $fourteen" | |
"$one $two" | |
"$three $four" | |
"$four $five" | |
"$four $six" | |
"$four $seven" | |
"$four $eight" | |
"$five $eight" | |
"$six $eight" | |
"$two $three" | |
"$two $four" | |
"$two $five" | |
"$two $six" | |
"$two $seven" | |
"$two $eight" | |
"$thirteen $eleven" | |
"$thirteen $twelve") | |
TESTNAMES=(idx0 | |
11_vs_12 | |
13_vs_14 | |
1_vs_2 | |
3_vs_4 | |
4_vs_5 | |
4_vs_6 | |
4_vs_7 | |
4_vs_8 | |
5_vs_8 | |
6_vs_8 | |
2_vs_3 | |
2_vs_4 | |
2_vs_5 | |
2_vs_6 | |
2_vs_7 | |
2_vs_8 | |
13_vs_11 | |
13_vs_12) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment