Created
February 7, 2011 18:20
-
-
Save arq5x/814879 to your computer and use it in GitHub Desktop.
RS Exome analysis on the PBS environment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
export BATCH1="1094PC0005 1094PC0009 1094PC0012 1094PC0013 " | |
export BATCH2="1094PC0016 1094PC0017 1094PC0018 1094PC0019 \ | |
1094PC0020 1094PC0021 1094PC0022 1094PC0023 1094PC0025 " | |
export BATCH3="1478PC0001B 1478PC0002 1478PC0003 1478PC0004 \ | |
1478PC0005 1478PC0006B 1478PC0007B 1478PC0008B \ | |
1478PC0009B 1478PC0010 1478PC0011 1478PC0012 \ | |
1478PC0013B 1478PC0014B 1478PC0015B 1478PC0016 \ | |
1478PC0017B 1478PC0018 1478PC0019 1478PC0020 \ | |
1478PC0021 1478PC0022B 1478PC0023B 1478PC0024B" | |
export BATCH4="1719PC0001 1719PC0002 1719PC0003 1719PC0004 \ | |
1719PC0005 1719PC0006 1719PC0007 1719PC0008 \ | |
1719PC0009 1719PC0010 1719PC0011 1719PC0012 \ | |
1719PC0013 1719PC0014 1719PC0015 1719PC0016 \ | |
1719PC0017 1719PC0018 1719PC0019 1719PC0020 \ | |
1719PC0021 1719PC0022" | |
export ALL="1094PC0005 1094PC0009 1094PC0012 1094PC0013 \ | |
1094PC0016 1094PC0017 1094PC0018 1094PC0019 \ | |
1094PC0020 1094PC0021 1094PC0022 1094PC0023 1094PC0025 \ | |
1478PC0001B 1478PC0002 1478PC0003 1478PC0004 \ | |
1478PC0005 1478PC0006B 1478PC0007B 1478PC0008B \ | |
1478PC0009B 1478PC0010 1478PC0011 1478PC0012 \ | |
1478PC0013B 1478PC0014B 1478PC0015B 1478PC0016 \ | |
1478PC0017B 1478PC0018 1478PC0019 1478PC0020 \ | |
1478PC0021 1478PC0022B 1478PC0023B 1478PC0024B \ | |
1478PC0025 \ | |
1719PC0001 1719PC0002 1719PC0003 1719PC0004 \ | |
1719PC0005 1719PC0006 1719PC0007 1719PC0008 \ | |
1719PC0009 1719PC0010 1719PC0011 1719PC0012 \ | |
1719PC0013 1719PC0014 1719PC0015 1719PC0016 \ | |
1719PC0017 1719PC0018 1719PC0019 1719PC0020 \ | |
1719PC0021 1719PC0022" | |
export LAST="1478PC0025" | |
############################################################ | |
# Convert latest batch. | |
############################################################ | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_1_GSL48index_2_SL16528.fastq C0UPMACXX_s2_1_GSL48index_2_SL16528.fastq > 1719-PC-0001.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_2_GSL48index_2_SL16528.fastq C0UPMACXX_s2_2_GSL48index_2_SL16528.fastq > 1719-PC-0001.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_1_GSL48index_3_SL16529.fastq C0UPMACXX_s2_1_GSL48index_3_SL16529.fastq > 1719-PC-0002.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_2_GSL48index_3_SL16529.fastq C0UPMACXX_s2_2_GSL48index_3_SL16529.fastq > 1719-PC-0002.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_1_GSL48index_4_SL16530.fastq C0UPMACXX_s2_1_GSL48index_4_SL16530.fastq > 1719-PC-0003.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_2_GSL48index_4_SL16530.fastq C0UPMACXX_s2_2_GSL48index_4_SL16530.fastq > 1719-PC-0003.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_1_GSL48index_5_SL16531.fastq C0UPMACXX_s2_1_GSL48index_5_SL16531.fastq > 1719-PC-0004.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_2_GSL48index_5_SL16531.fastq C0UPMACXX_s2_2_GSL48index_5_SL16531.fastq > 1719-PC-0004.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_1_GSL48index_6_SL16532.fastq C0UPMACXX_s2_1_GSL48index_6_SL16532.fastq > 1719-PC-0005.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_2_GSL48index_6_SL16532.fastq C0UPMACXX_s2_2_GSL48index_6_SL16532.fastq > 1719-PC-0005.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_1_GSL48index_7_SL16533.fastq C0UPMACXX_s2_1_GSL48index_7_SL16533.fastq > 1719-PC-0006.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_2_GSL48index_7_SL16533.fastq C0UPMACXX_s2_2_GSL48index_7_SL16533.fastq > 1719-PC-0006.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_1_GSL48index_8_SL16534.fastq C0UPMACXX_s2_1_GSL48index_8_SL16534.fastq > 1719-PC-0007.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_2_GSL48index_8_SL16534.fastq C0UPMACXX_s2_2_GSL48index_8_SL16534.fastq > 1719-PC-0007.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_1_GSL48index_9_SL16535.fastq C0UPMACXX_s2_1_GSL48index_9_SL16535.fastq > 1719-PC-0008.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s1_2_GSL48index_9_SL16535.fastq C0UPMACXX_s2_2_GSL48index_9_SL16535.fastq > 1719-PC-0008.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_1_GSL48index_10_SL16536.fastq C0UPMACXX_s4_1_GSL48index_10_SL16536.fastq > 1719-PC-0009.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_2_GSL48index_10_SL16536.fastq C0UPMACXX_s4_2_GSL48index_10_SL16536.fastq > 1719-PC-0009.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_1_GSL48index_11_SL16537.fastq C0UPMACXX_s4_1_GSL48index_11_SL16537.fastq > 1719-PC-0010.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_2_GSL48index_11_SL16537.fastq C0UPMACXX_s4_2_GSL48index_11_SL16537.fastq > 1719-PC-0010.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_1_GSL48index_12_SL16538.fastq C0UPMACXX_s4_1_GSL48index_12_SL16538.fastq > 1719-PC-0011.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_2_GSL48index_12_SL16538.fastq C0UPMACXX_s4_2_GSL48index_12_SL16538.fastq > 1719-PC-0011.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_1_GSL48index_1_SL16539.fastq C0UPMACXX_s4_1_GSL48index_1_SL16539.fastq > 1719-PC-0012.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_2_GSL48index_1_SL16539.fastq C0UPMACXX_s4_2_GSL48index_1_SL16539.fastq > 1719-PC-0012.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_1_GSL48index_2_SL16540.fastq C0UPMACXX_s4_1_GSL48index_2_SL16540.fastq > 1719-PC-0013.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_2_GSL48index_2_SL16540.fastq C0UPMACXX_s4_2_GSL48index_2_SL16540.fastq > 1719-PC-0013.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_1_GSL48index_3_SL16541.fastq C0UPMACXX_s4_1_GSL48index_3_SL16541.fastq > 1719-PC-0014.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_2_GSL48index_3_SL16541.fastq C0UPMACXX_s4_2_GSL48index_3_SL16541.fastq > 1719-PC-0014.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_1_GSL48index_4_SL16542.fastq C0UPMACXX_s4_1_GSL48index_4_SL16542.fastq > 1719-PC-0015.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_2_GSL48index_4_SL16542.fastq C0UPMACXX_s4_2_GSL48index_4_SL16542.fastq > 1719-PC-0015.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_1_GSL48index_5_SL16543.fastq C0UPMACXX_s4_1_GSL48index_5_SL16543.fastq > 1719-PC-0016.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s3_2_GSL48index_5_SL16543.fastq C0UPMACXX_s4_2_GSL48index_5_SL16543.fastq > 1719-PC-0016.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_1_GSL48index_6_SL16544.fastq C0UPMACXX_s6_1_GSL48index_6_SL16544.fastq > 1719-PC-0017.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_2_GSL48index_6_SL16544.fastq C0UPMACXX_s6_2_GSL48index_6_SL16544.fastq > 1719-PC-0017.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_1_GSL48index_7_SL16545.fastq C0UPMACXX_s6_1_GSL48index_7_SL16545.fastq > 1719-PC-0018.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_2_GSL48index_7_SL16545.fastq C0UPMACXX_s6_2_GSL48index_7_SL16545.fastq > 1719-PC-0018.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_1_GSL48index_8_SL16546.fastq C0UPMACXX_s6_1_GSL48index_8_SL16546.fastq > 1719-PC-0019.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_2_GSL48index_8_SL16546.fastq C0UPMACXX_s6_2_GSL48index_8_SL16546.fastq > 1719-PC-0019.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_1_GSL48index_9_SL16547.fastq C0UPMACXX_s6_1_GSL48index_9_SL16547.fastq > 1719-PC-0020.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_2_GSL48index_9_SL16547.fastq C0UPMACXX_s6_2_GSL48index_9_SL16547.fastq > 1719-PC-0020.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_1_GSL48index_10_SL16548.fastq C0UPMACXX_s6_1_GSL48index_10_SL16548.fastq > 1719-PC-0021.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_2_GSL48index_10_SL16548.fastq C0UPMACXX_s6_2_GSL48index_10_SL16548.fastq > 1719-PC-0021.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_1_GSL48index_11_SL16549.fastq C0UPMACXX_s6_1_GSL48index_11_SL16549.fastq > 1719-PC-0022.1.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/origFromHudsonAlpha/2012-Sept; cat C0UPMACXX_s5_2_GSL48index_11_SL16549.fastq C0UPMACXX_s6_2_GSL48index_11_SL16549.fastq > 1719-PC-0022.2.fq" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=1000m:ncpus=1 | |
############################################################ | |
# Quick hack to ensure no sample mix-ups. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-proper | |
for sample in `echo $BATCH3` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=2 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools view -q 20 -f 2 -u origFromHudsonAlpha/$sample.bam \ | |
| intersectBed -abam stdin -b bed/sureselect.liftedTohg19.slop500.numchroms.bed \ | |
> bam/$sample.conc.on.bam" | $QSUB | |
done | |
###################################### | |
# Sort the original BAM files by name: | |
###################################### | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rs-ex-nmsrt | |
for sample in `echo $BATCH4` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=6000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools sort -n -m 2000000000 origFromHudsonAlpha/$sample.bam bam/$sample.namesorted" | $QSUB | |
done | |
############################################################ | |
# Create new FASTQ files for every sample | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rs-ex-b2fq | |
for sample in `echo $BATCH4`; | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; java -Xmx2g -jar /home/arq5x/cphg-home/bin/SamToFastq.jar \ | |
INPUT=bam/$sample.namesorted.bam \ | |
VALIDATION_STRINGENCY=LENIENT \ | |
F=fastq/$sample.1.fq \ | |
F2=fastq/$sample.2.fq" | $QSUB; | |
done | |
############################################################ | |
# Gzip the FASTQs | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-fqzip | |
for sample in `echo $BATCH4`; | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; gzip fastq/$sample.1.fq" | $QSUB; | |
echo "cd $RSHOME; gzip fastq/$sample.2.fq" | $QSUB; | |
done | |
########################################## | |
# Delete the name-sorted BAMs | |
########################################## | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
for sample in `echo $BATCH4` | |
do | |
cd $RSHOME; rm bam/$sample.namesorted.bam | |
done | |
########################################## | |
# Align the raw FASTQ with BWA | |
########################################## | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rs-ex-bwa-aln | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
for sample in `echo $BATCH4` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=8 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; bwa aln -e 1 -t 8 $GENOME fastq/$sample.1.fq > bam/$sample.1.sai" | $QSUB | |
echo "cd $RSHOME; bwa aln -e 1 -t 8 $GENOME fastq/$sample.2.fq > bam/$sample.2.sai" | $QSUB | |
done | |
############################################################ | |
# Pair the alignments. | |
# Keep proper, on-target (i.e. +/- 500 bp of a probe) pairs. | |
# Require mapping quality >= 20 | |
# Add the sample ID as a RG | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-bwa-pair | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
for sample in `echo $BATCH4` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=3 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; bwa sampe -r '@RG\tID:$sample\tSM:$sample' $GENOME bam/$sample.1.sai bam/$sample.2.sai fastq/$sample.1.fq fastq/$sample.2.fq \ | |
| samtools view -q 20 -f 2 -Su - \ | |
| intersectBed -abam stdin -b bed/sureselect.liftedTohg19.slop500.bed \ | |
> bam/$sample.conc.on.bam" | $QSUB | |
done | |
###################################### | |
# Data quality assessment - flagstat: | |
###################################### | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-flagstat | |
for sample in `echo $ALL` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools flagstat origFromHudsonAlpha/$sample.bam > origFromHudsonAlpha/$sample.bam.flagstat" | $QSUB | |
done | |
########################################################## | |
# Data quality assessment - flagstat for conc, on-target: | |
########################################################## | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-conc-fs | |
for sample in `echo $ALL` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools flagstat bam/$sample.conc.on.bam > bam/$sample.conc.on.bam.flagstat" | $QSUB | |
done | |
###################################### | |
# Data quality assessment - coverage: | |
###################################### | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rs-ex-cov | |
export SAMPLES="1094PC0005 1094PC0009 1094PC0012 1094PC0013 1094PC0016 1094PC0017 1094PC0018 1094PC0019 1094PC0020 1094PC0021 1094PC0022 1094PC0023 1094PC0025" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; coverageBed -abam bam/$sample.conc.on.bam -b bed/sureselect.liftedTohg19.slop500.bed > bam/$sample.conc.on.bam.cov" | $QSUB | |
done | |
###################################### | |
# Data quality assessment - err. rate: | |
###################################### | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rs-ex-err | |
export SAMPLES="1094PC0005 1094PC0009 1094PC0012 1094PC0013 1094PC0016 1094PC0017 1094PC0018 1094PC0019 1094PC0020 1094PC0021 1094PC0022 1094PC0023 1094PC0025" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; bamToBed -i bam/$sample.conc.on.bam -tag NM | head -1000000 | cut -f 5 | stats > bam/$sample.conc.on.bam.err" | $QSUB | |
done | |
############################################################ | |
# Sort the on-target concordants by genome position. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rs-ex-possrt | |
for sample in `echo $BATCH4` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=12000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools sort -m 4000000000 bam/$sample.conc.on.bam bam/$sample.conc.on.pos" | $QSUB | |
done | |
############################################################ | |
# Index the position-sorted BAM files. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-index | |
for sample in `echo $BATCH4` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools index bam/$sample.conc.on.pos.bam" | $QSUB | |
done | |
############################################################ | |
# Use mpileup to get a snese of any sample mix-ups | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-mpile | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export SAMPLES="bam/1094PC0005.conc.on.pos.bam \ | |
bam/1094PC0009.conc.on.pos.bam \ | |
bam/1094PC0012.conc.on.pos.bam \ | |
bam/1094PC0013.conc.on.pos.bam \ | |
bam/1094PC0016.conc.on.pos.bam \ | |
bam/1094PC0017.conc.on.pos.bam \ | |
bam/1094PC0018.conc.on.pos.bam \ | |
bam/1094PC0019.conc.on.pos.bam \ | |
bam/1094PC0020.conc.on.pos.bam \ | |
bam/1094PC0021.conc.on.pos.bam \ | |
bam/1094PC0022.conc.on.pos.bam \ | |
bam/1094PC0023.conc.on.pos.bam \ | |
bam/1094PC0025.conc.on.pos.bam \ | |
bam/1478PC0017B.conc.on.bam \ | |
bam/1478PC0012.conc.on.bam \ | |
bam/1478PC0024B.conc.on.bam \ | |
bam/1478PC0004.conc.on.bam \ | |
bam/1478PC0003.conc.on.bam \ | |
bam/1478PC0018.conc.on.bam \ | |
bam/1478PC0009B.conc.on.bam \ | |
bam/1478PC0010.conc.on.bam \ | |
bam/1478PC0013B.conc.on.bam \ | |
bam/1478PC0011.conc.on.bam \ | |
bam/1478PC0019.conc.on.bam \ | |
bam/1478PC0007B.conc.on.bam \ | |
bam/1478PC0022B.conc.on.bam \ | |
bam/1478PC0015B.conc.on.bam \ | |
bam/1478PC0021.conc.on.bam \ | |
bam/1478PC0014B.conc.on.bam \ | |
bam/1478PC0005.conc.on.bam \ | |
bam/1478PC0001B.conc.on.bam \ | |
bam/1478PC0006B.conc.on.bam \ | |
bam/1478PC0008B.conc.on.bam \ | |
bam/1478PC0016.conc.on.bam \ | |
bam/1478PC0002.conc.on.bam \ | |
bam/1478PC0023B.conc.on.bam \ | |
bam/1478PC0020.conc.on.bam" | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools mpileup -uf $GENOME $SAMPLES | bcftools view -bvcg - > varCalling/2012-Jan/kinship-quick/var.mpileup.raw.bcf" | $QSUB | |
############################################################ | |
# Mark duplicates | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rs-ex-dupe | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export BIN=/home/arq5x/cphg-home/shared/bin/picard-tools-1.60 | |
for sample in `echo $BATCH4`; | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=2 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; java -Xmx2g -jar $BIN/MarkDuplicates.jar \ | |
INPUT=bam/$sample.conc.on.pos.bam \ | |
OUTPUT=bam/$sample.conc.on.pos.dedup.bam \ | |
TMP_DIR=$RSHOME/bam \ | |
VALIDATION_STRINGENCY=LENIENT \ | |
ASSUME_SORTED=true \ | |
METRICS_FILE=bam/$sample.markdup_metrics" | $QSUB; | |
done | |
############################################################ | |
# Index the position-sorted, deduped BAM files. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-index | |
for sample in `echo $BATCH4` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools index bam/$sample.conc.on.pos.dedup.bam" | $QSUB | |
done | |
############################################################ | |
# Merge BAM files | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rs-ex-merge | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export BIN=/home/arq5x/cphg-home/shared/bin/picard-tools-1.60 | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=4 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; java -Xmx2g -jar $BIN/MergeSamFiles.jar \ | |
O=bam/all.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0005.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0009.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0012.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0013.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0016.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0017.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0018.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0019.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0020.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0021.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0022.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0023.conc.on.pos.dedup.bam \ | |
I=bam/1094PC0025.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0001B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0002.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0003.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0004.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0005.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0006B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0007B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0008B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0009B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0010.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0011.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0012.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0013B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0014B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0015B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0016.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0017B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0018.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0019.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0020.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0021.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0022B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0023B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0024B.conc.on.pos.dedup.bam \ | |
I=bam/1478PC0025.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0001.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0002.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0003.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0004.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0005.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0006.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0007.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0008.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0009.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0010.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0011.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0012.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0013.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0014.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0015.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0016.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0017.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0018.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0019.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0020.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0021.conc.on.pos.dedup.bam \ | |
I=bam/1719PC0022.conc.on.pos.dedup.bam \ | |
TMP_DIR=$RSHOME/bam \ | |
USE_THREADING=true" | $QSUB; | |
############################################################ | |
# Identify realignment targets. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-realtgts | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export BIN=/home/arq5x/cphg-home/shared/bin/GenomeAnalysisTK-1.4-9-g1f1233b | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=3 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; java -Xmx2g -jar $BIN/GenomeAnalysisTK.jar \ | |
-T RealignerTargetCreator \ | |
-R $GENOME \ | |
-I bam/all.conc.on.pos.dedup.bam \ | |
-o bam/all.conc.on.pos.dedup.bam.intervals" | $QSUB; | |
############################################################ | |
# Use GATK to realign the BAMs at the suspicious targets. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-realtgts | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export BIN=/home/arq5x/cphg-home/shared/bin/GenomeAnalysisTK-1.4-9-g1f1233b | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=4 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; java -Xmx2g -Djava.io.tmpdir=$RSHOME/bam -jar $BIN/GenomeAnalysisTK.jar \ | |
-T IndelRealigner \ | |
--num_threads 1 \ | |
-R $GENOME \ | |
-targetIntervals bam/all.conc.on.pos.dedup.bam.intervals \ | |
-I bam/all.conc.on.pos.dedup.bam \ | |
-o bam/all.conc.on.pos.dedup.realigned.bam" | $QSUB; | |
############################################################ | |
# Index the merged, deduped BAM file as well as the | |
# merged, deduped, realigned file. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-index | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=4 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $RSHOME; samtools index bam/all.conc.on.pos.dedup.bam" | $QSUB | |
echo "cd $RSHOME; samtools index bam/all.conc.on.pos.dedup.realigned.bam" | $QSUB | |
############################################################ | |
# Call SNPs and INDELs with GATK using BAQ. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-varbaq | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export BIN=/home/arq5x/cphg-home/shared/bin/GenomeAnalysisTK-1.4-9-g1f1233b | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=12 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; java -Xmx2g -jar $BIN/GenomeAnalysisTK.jar \ | |
-T UnifiedGenotyper \ | |
-glm BOTH \ | |
--num_threads 10 \ | |
-baq CALCULATE_AS_NECESSARY \ | |
-R $GENOME \ | |
-I bam/all.conc.on.pos.dedup.realigned.bam \ | |
-o varCalling/2012-Nov-09/all.raw.baq.vcf" | $QSUB; | |
############################################################ | |
# Call SNPs and INDELs with GATK W/O using BAQ. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-varbaq | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export BIN=/home/arq5x/cphg-home/shared/bin/GenomeAnalysisTK-1.4-9-g1f1233b | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=12 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; java -Xmx2g -jar $BIN/GenomeAnalysisTK.jar \ | |
-T UnifiedGenotyper \ | |
-glm BOTH \ | |
--num_threads 10 \ | |
-baq OFF \ | |
-R $GENOME \ | |
-I bam/all.conc.on.pos.dedup.realigned.bam \ | |
-o varCalling/2012-Nov-09/all.raw.nobaq.vcf" | $QSUB; | |
############################################################ | |
# Call INDELs with GATK. | |
############################################################ | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-callsnps | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export SAMPLES="1094PC0005 1094PC0009 1094PC0012 1094PC0013 1094PC0016 1094PC0017 1094PC0018 1094PC0019 1094PC0020 1094PC0021 1094PC0022 1094PC0023 1094PC0025" | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=4 -N $STEPNAME -m bea -M [email protected]"; | |
for sample in `echo $SAMPLES`; | |
do | |
echo "cd $RSHOME; java -Xmx4g -jar /home/arq5x/cphg-home/bin/GenomeAnalysisTK.jar \ | |
-T IndelGenotyperV2 \ | |
-l INFO \ | |
-R $GENOME \ | |
-I bam/$sample.conc.on.pos.realigned.dedup.baq.bam \ | |
-o varCalling/$sample.indel.vcf" | $QSUB; | |
done | |
######################################################################## | |
# Combine the INDEL call from each sample into a single file with GATK. | |
######################################################################## | |
export RSHOME=/home/arq5x/cphg-home/projects/rs-exome | |
export STEPNAME=rsex-callsnps | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=2 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $RSHOME; java -Xmx10g -jar /home/arq5x/cphg-home/bin/GenomeAnalysisTK.jar \ | |
-T CombineVariants \ | |
-R $GENOME \ | |
-o varCalling/indels.raw.vcf \ | |
-variantMergeOptions UNION \ | |
-genotypeMergeOptions UNIQUIFY \ | |
-B:1094PC0005,VCF varCalling/1094PC0005.indel.vcf \ | |
-B:1094PC0009,VCF varCalling/1094PC0009.indel.vcf \ | |
-B:1094PC0012,VCF varCalling/1094PC0012.indel.vcf \ | |
-B:1094PC0013,VCF varCalling/1094PC0013.indel.vcf \ | |
-B:1094PC0016,VCF varCalling/1094PC0016.indel.vcf \ | |
-B:1094PC0017,VCF varCalling/1094PC0017.indel.vcf \ | |
-B:1094PC0018,VCF varCalling/1094PC0018.indel.vcf \ | |
-B:1094PC0019,VCF varCalling/1094PC0019.indel.vcf \ | |
-B:1094PC0020,VCF varCalling/1094PC0020.indel.vcf \ | |
-B:1094PC0021,VCF varCalling/1094PC0021.indel.vcf \ | |
-B:1094PC0022,VCF varCalling/1094PC0022.indel.vcf \ | |
-B:1094PC0023,VCF varCalling/1094PC0023.indel.vcf \ | |
-B:1094PC0025,VCF varCalling/1094PC0025.indel.vcf " | $QSUB; | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/varCalling/; annotateVcf.py -i snps.raw.vcf -p /home/arq5x/cphg-home/bin/annovar/ -s /home/arq5x/cphg-home/shared/annotations/hg19/dbSNP/dbsnp.131.hg19.bed -a /home/arq5x/cphg-home/shared/data/exomes/1000G-210Exomes-20110214/ALL.broad_exome.20110124.both.sites.vcf -o /home/arq5x/cphg-home/projects/rs-exome/varCalling/" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=12000m:ncpus=1 -N snp -m bea -M [email protected] | |
echo "cd /home/arq5x/cphg-home/projects/rs-exome/varCalling/; annotateVcf.py -i indels.raw.vcf -p /home/arq5x/cphg-home/bin/annovar/ -s /home/arq5x/cphg-home/shared/annotations/hg19/dbSNP/dbsnp.131.hg19.bed -a /home/arq5x/cphg-home/shared/data/exomes/1000G-210Exomes-20110214/ALL.broad_exome.20110124.both.sites.vcf -o /home/arq5x/cphg-home/projects/rs-exome/varCalling/" | qsub -q cphg -W group_list=CPHG -V -l select=1:mem=12000m:ncpus=1 -N indel -m bea -M [email protected] | |
# combine the output of snps and indel annotations into a single file | |
cat snps.raw.vcf.1.annovar_anno.2.dbsnp_anno.3.vcf_anno indels.raw.vcf.1.annovar_anno.2.dbsnp_anno.3.vcf_anno > all.vcf.anno | |
# move the file to my laptop | |
scp elder.itc.virginia.edu:~/cphg-home/projects/rs-exome/varCalling/all.vcf.anno all.vcf.anno.txt | |
# extract some add'l information from the VCF INFO field | |
perl -lane 'if (/DP=(\d+)/) {print $1} else {print "-1"}' all.vcf.anno.txt > depths.txt | |
perl -lane 'if (/HRun=(\d+)/) {print $1} else {print "-1"}' all.vcf.anno.txt > hruns.txt | |
perl -lane 'if (/AF=(\d.\d+)/) {print $1} else {print "-1"}' all.vcf.anno.txt > dafs.txt | |
perl -lane 'if (/MQ=(\d+.\d+)/) {print $1} else {print "-1"}' all.vcf.anno.txt > mapqs.txt | |
# add the extra fields. | |
paste depths.txt mapqs.txt dafs.txt hruns.txt all.vcf.anno.txt > all.vcf.anno.info.txt | |
# require: at least 100 reads | |
# mapq>=30 | |
# no intronic, intergenic, upstream, downstream or synonymous | |
awk '$1>=100 && $2 >30' all.vcf.anno.info.txt | \ | |
grep -v intronic | \ | |
grep -v intergenic | \ | |
grep -v upstream | \ | |
grep -v downstream > \ | |
all.vcf.anno.info.final.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment