Last active
September 24, 2015 22:47
-
-
Save arq5x/821179 to your computer and use it in GitHub Desktop.
T1D Loci Exome for all 7 pools on the PBS environment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################ | |
# Pair the alignments. | |
# Keep proper, on-target (i.e. +/- 500 bp of a probe) pairs. | |
# Require mapping quality >= 20 | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-ex-bwa-par | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
export TARGETS=bed/probes.merged.slop1000.hg19.bed | |
for pool in `echo $POOLS` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=3 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $DIR; | |
bwa sampe -r '@RG\tID:$pool\tSM:$pool' $GENOME bam-hg19/$pool.1.sai bam-hg19/$pool.2.sai fastq/$pool.1.fq fastq/$pool.2.fq \ | |
| samtools view -q 20 -f 2 -Su - \ | |
| intersectBed -abam stdin -b $TARGETS \ | |
> bam-hg19/$pool.conc.on.bam" | $QSUB | |
done | |
############################################################ | |
# Sort and index the alignments. | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-ex-srtidx | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $DIR; \ | |
samtools sort -m 2000000000 bam-hg19/$pool.conc.on.bam bam-hg19/$pool.conc.on.pos; \ | |
samtools index bam-hg19/$pool.conc.on.pos.bam" | $QSUB | |
done | |
############################################################ | |
# Identify realignment targets. | |
# NEED TO MAKE A GATK SORTED REF GENOME | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-targets | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS`; | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=3 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $DIR; java -Xmx2g \ | |
-jar /home/arq5x/cphg-home/bin/GenomeAnalysisTK.jar \ | |
-T RealignerTargetCreator \ | |
-R $GENOME \ | |
-I bam-hg19/$pool.conc.on.pos.bam \ | |
-o bam-hg19/$pool.conc.on.pos.bam.intervals" | $QSUB; | |
done | |
############################################################# | |
# Call variants with freebayes: INDEL-REALIGNED | |
############################################################# | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
export STEPNAME=t1d-freebayes | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
export VERSION=2013-01-18 | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
echo "cd $DIR; freebayes -f $GENOME \ | |
-b bam-hg19/pool1-63.conc.on.pos.realigned.bam \ | |
-b bam-hg19/pool2-43.conc.on.pos.realigned.bam \ | |
-b bam-hg19/pool2-63.conc.on.pos.realigned.bam \ | |
-b bam-hg19/pool3-63.conc.on.pos.realigned.bam \ | |
-b bam-hg19/pool4-63.conc.on.pos.realigned.bam \ | |
-b bam-hg19/pool5-63.conc.on.pos.realigned.bam \ | |
-b bam-hg19/pool6-63.conc.on.pos.realigned.bam \ | |
-b bam-hg19/pool7-63.conc.on.pos.realigned.bam \ | |
-p 20 \ | |
-J \ | |
--min-coverage 100 \ | |
--min-base-quality 10 \ | |
--min-alternate-total 5 \ | |
--no-marginals \ | |
-i \ | |
> varCalling/$VERSION/raw.freebayes.rg.realigned.vcf" | $QSUB |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Where? | |
/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
########################################################################################## | |
# Create symlinks to the original data and rename the lanes by the pools-length sequenced | |
########################################################################################## | |
ln -s ../orig_data/pool1-63/s_7_1_sequence.txt pool1-63.1.fq | |
ln -s ../orig_data/pool1-63/s_7_2_sequence.txt pool1-63.2.fq | |
ln -s ../orig_data/pool2-43/s_5_1_sequence.txt pool2-43.1.fq | |
ln -s ../orig_data/pool2-43/s_5_2_sequence.txt pool2-43.2.fq | |
ln -s ../orig_data/pool2-63/s_8_1_sequence.txt pool2-63.1.fq | |
ln -s ../orig_data/pool2-63/s_8_2_sequence.txt pool2-63.2.fq | |
ln -s ../orig_data/pool3-63/s_3_1_sequence.txt pool3-63.1.fq | |
ln -s ../orig_data/pool3-63/s_3_2_sequence.txt pool3-63.2.fq | |
ln -s ../orig_data/pool4-63/s_4_1_sequence.txt pool4-63.1.fq | |
ln -s ../orig_data/pool4-63/s_4_2_sequence.txt pool4-63.2.fq | |
ln -s ../orig_data/pool5-63/s_5_1_sequence.txt pool5-63.1.fq | |
ln -s ../orig_data/pool5-63/s_5_2_sequence.txt pool5-63.2.fq | |
ln -s ../orig_data/pool6-63/s_6_1_sequence.txt pool6-63.1.fq | |
ln -s ../orig_data/pool6-63/s_6_2_sequence.txt pool6-63.2.fq | |
ln -s ../orig_data/pool7-63/s_7_1_sequence.txt pool7-63.1.fq | |
ln -s ../orig_data/pool7-63/s_7_2_sequence.txt pool7-63.2.fq | |
########################################################################################## | |
# Align the pools to hg18 with BWA | |
########################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-ex-bwa-aln | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=4 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $DIR; bwa aln -t 4 $GENOME fastq/$pool.1.fq > bam/$pool.1.sai" | |
echo "cd $DIR; bwa aln -t 4 $GENOME fastq/$pool.2.fq > bam/$pool.2.sai" | |
done | |
########################################################################################## | |
# Align the pools to hg19 with BWA | |
########################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-ex-bwa-aln | |
# KATIE, FIX HG18 to HG19 (/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa) | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=4 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $DIR; bwa aln -t 4 $GENOME fastq/$pool.1.fq > bam-hg19/$pool.1.sai" | |
echo "cd $DIR; bwa aln -t 4 $GENOME fastq/$pool.2.fq > bam-hg19/$pool.2.sai" | |
done | |
############################################################ | |
# Pair the alignments. | |
# Keep proper, on-target (i.e. +/- 500 bp of a probe) pairs. | |
# Require mapping quality >= 20 | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-ex-bwa-par | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
export TARGETS=bed/probes.merged.slop1000.bed | |
for pool in `echo $POOLS` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=3 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $DIR; | |
bwa sampe -r '@RG\tID:$pool\tSM:$pool' $GENOME bam/$pool.1.sai bam/$pool.2.sai fastq/$pool.1.fq fastq/$pool.2.fq \ | |
| samtools view -q 20 -f 2 -Su - \ | |
| intersectBed -abam stdin -b $TARGETS \ | |
> bam/$pool.conc.on.bam" | $QSUB | |
done | |
############################################################ | |
# Sort and index the alignments. | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-ex-srtidx | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $DIR; \ | |
samtools sort -m 2000000000 bam/$pool.conc.on.bam bam/$pool.conc.on.pos; \ | |
samtools index bam/$pool.conc.on.pos.bam" | $QSUB | |
done | |
############################################################ | |
# Identify realignment targets. | |
# NEED TO MAKE A GATK SORTED REF GENOME | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-targets | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS`; | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=3 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $DIR; java -Xmx10g \ | |
-jar /home/arq5x/cphg-home/bin/GenomeAnalysisTK.jar \ | |
-T RealignerTargetCreator \ | |
-R $GENOME \ | |
-I bam/$pool.conc.on.pos.bam \ | |
-o bam/$pool.conc.on.pos.bam.intervals" | $QSUB; | |
done | |
############################################################ | |
# Use GATK to realign the BAMs at the suspicious targets. | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-targets | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS`; | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=4 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $DIR; java -Xmx12g -Djava.io.tmpdir=$DIR/bam -jar /home/arq5x/cphg-home/bin/GenomeAnalysisTK.jar \ | |
-T IndelRealigner \ | |
--num_threads 1 \ | |
-R $GENOME \ | |
-targetIntervals bam/$pool.conc.on.pos.bam.intervals \ | |
-I bam/$pool.conc.on.pos.bam \ | |
-o bam/$pool.conc.on.pos.realigned.bam" | $QSUB; | |
done | |
############################################################ | |
# Use Picard to mark duplicates. | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-targets | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS`; | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=2 -N $STEPNAME -m bea -M [email protected]"; | |
echo "cd $DIR; java -Xmx4g -jar /home/arq5x/cphg-home/bin/MarkDuplicates.jar \ | |
INPUT=bam/$pool.conc.on.pos.realigned.bam \ | |
OUTPUT=bam/$pool.conc.on.pos.realigned.dedup.bam \ | |
TMP_DIR=$pool/bam \ | |
METRICS_FILE=bam/$pool.markdup_metrics" | $QSUB; | |
done | |
############################################################ | |
# Index the realigned and deduped BAMs. | |
############################################################ | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
export STEPNAME=t1d-ex-srtidx | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
for pool in `echo $POOLS` | |
do | |
# -m ea sends an email when job (e)nds or (a)borts | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $DIR; samtools index bam/$pool.conc.on.pos.realigned.dedup.bam" | $QSUB | |
done | |
############################################################# | |
# Call variants with freebayes: INDEL-REALIGNED | |
############################################################# | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full.fa | |
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63" | |
export STEPNAME=t1d-freebayes | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
export VERSION=2011-04-04 | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/ | |
echo "cd $DIR; freebayes -f $GENOME \ | |
-b bam/pool1-63.conc.on.pos.realigned.bam \ | |
-b bam/pool2-43.conc.on.pos.realigned.bam \ | |
-b bam/pool2-63.conc.on.pos.realigned.bam \ | |
-b bam/pool3-63.conc.on.pos.realigned.bam \ | |
-b bam/pool4-63.conc.on.pos.realigned.bam \ | |
-b bam/pool5-63.conc.on.pos.realigned.bam \ | |
-b bam/pool6-63.conc.on.pos.realigned.bam \ | |
-b bam/pool7-63.conc.on.pos.realigned.bam \ | |
-p 20 \ | |
-J \ | |
--min-coverage 100 \ | |
--min-base-quality 10 \ | |
--min-alternate-total 5 \ | |
--no-marginals \ | |
-i \ | |
> varCalling/$VERSION/raw.freebayes.rg.realigned.vcf" | $QSUB | |
############################################################################# | |
# Cull the raw VCF file to those variants that are within 1000 bp of a target | |
############################################################################# | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
export BED=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/bed | |
cd $DIR; intersectBed -a raw.freebayes.rg.vcf -b $BED/probes.merged.slop1000.bed > raw.freebayes.rg.on.slop1000.vcf | |
############################################################################################## | |
# Only keep variants with a total read depth of at least 50 and at least 5 alternate alleles. | |
############################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
cd $DIR; cat raw.freebayes.rg.on.slop1000.vcf | ./vcfFilter.pl -d 100 -a 10 -c freebayes > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf | |
############################################################################################## | |
# Convert VCF to annovar format and carry along the extra info. | |
############################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
cd $DIR; convert2annovar.pl --format vcf4 \ | |
--includeinfo \ | |
raw.freebayes.rg.on.slop1000.depth100.alt10.vcf \ | |
> raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar | |
############################################################################################## | |
# Annotate the variants by gene. | |
############################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
export ANNOVAR_PATH=/home/arq5x/cphg-home/bin/annovar | |
cd $DIR; annotate_variation.pl -buildver hg18 raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar $ANNOVAR_PATH/humandb/ | |
############################################################################################## | |
# Download SIFT annotations for ANNOVAR. | |
# NOTE: No need to do this every time. | |
############################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
export ANNOVAR_PATH=/home/arq5x/cphg-home/bin/annovar | |
annotate_variation.pl -downdb avsift $ANNOVAR_PATH/humandb/ | |
############################################################################################## | |
# Annotate the variants by SIFT. | |
############################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
export ANNOVAR_PATH=/home/arq5x/cphg-home/bin/annovar | |
export STEPNAME=anno_sift | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $DIR; annotate_variation.pl -sift_threshold 0 -filter -dbtype avsift \ | |
raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar \ | |
$ANNOVAR_PATH/humandb/" | $QSUB | |
############################################################################################## | |
# Standardize (fudge) the ANNOVAR output. | |
############################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
export ANNOVAR_PATH=/home/arq5x/cphg-home/bin/annovar | |
cd $DIR | |
awk '{OFS=""; print "avsift\tfiltered", "\t", $0}' raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered.corrected | |
mv raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered.corrected raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered | |
cat raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered \ | |
raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_dropped \ | |
> raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all | |
sort -k3,3 -k4,4n raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all.sorted | |
sort -k3,3 -k4,4n raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function.sorted | |
mv raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function.sorted raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function | |
mv raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all.sorted raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all | |
paste raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function | awk '{OFS="\t"; print $3,$4-1,$5,$6,$7,$8,$1,$2,$11,$12}' > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar | |
grep -v UNKNOWN raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.exonic_variant_function | \ | |
awk '{OFS="\t"; print $5,$6-1,$7,$8,$9,$2,$4}' > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar.bed | |
############################################################################################## | |
# Bring in the synonymous/nonsynonymous annotations from | |
# the annovar "exonic_variant_function" file. | |
############################################################################################## | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
cd $DIR | |
intersectBed -a raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar -b raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar.bed -wa -wb | cut -f 1-10,15,16 > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_exonic | |
intersectBed -a raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar -b raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar.bed -v | awk '{OFS=""; print $0,"\tnone\tnone"}' > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_intronic | |
cat raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_exonic raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_intronic | sort -k1,1 -k2,2n > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_all.bed | |
############################################################################################## | |
# FIX | |
# Add the genotypes and other information from the GATK VCF file. | |
############################################################################################## | |
# Annotate the SNPs that are already known via dbSNP | |
export DBSNP=/home/arq5x/cphg-home/shared/annotations/hg18/dbSNP/dbSNP.131.hg19.maphg18 | |
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling | |
cd $DIR | |
intersectBed -a raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_all.bed -b raw.freebayes.rg.on.slop1000.depth100.alt10.vcf -wa -wb | cut -f 1-12,20,22-100 | ./prettyUp.pl | windowBed -w 5 -a stdin -b $DBSNP | groupBy -g 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24 -c 28 -o collapse > all.dbSnp.rg.out | |
# Annotate the SNPs that are novel w.r.t. dbSNP | |
intersectBed -a raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_all.bed -b raw.freebayes.rg.on.slop1000.depth100.alt10.vcf -wa -wb | cut -f 1-12,20,22-100 | ./prettyUp.pl | windowBed -v -w 5 -a stdin -b $DBSNP | awk '{print $0,"\tnovel"}' > all.no_dbSnp.rg.out | |
# Combine the known and novel into a single file | |
cat all.dbSnp.rg.out all.no_dbSnp.rg.out > all.rg.out | |
# Try to identify regions that are likely to be alignment artifacts. | |
mergeBed -i all.rg.out -d 6 -n | awk '$4>=3' > alignment.artifacts.bed | |
# Add a flag to the end of each variant that indicates whether or not we should be suspicious of the variant. | |
intersectBed -a all.rg.out -b alignment.artifacts.bed -c > all.rg.flagged.out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment