Created
June 16, 2011 18:39
-
-
Save arq5x/1029917 to your computer and use it in GitHub Desktop.
Navin Main Processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################ | |
# Index the position-sorted BAM files. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="bam-index" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; samtools index bam/$sample.*.bam" | $QSUB | |
done | |
############################################################ | |
# Collect insert size metrics. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="picard-isize" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "java -jar /home/arq5x/cphg-home/bin/CollectInsertSizeMetrics.jar INPUT=$TUMHOME/bam/$sample.bam OUTPUT=$TUMHOME/bam/$sample.bam.isize H=$TUMHOME/bam/$sample.bam.hist STOP_AFTER=10000000" | $QSUB | |
done | |
############################################################ | |
# Flagstat. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="flagstat" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; samtools flagstat bam/$sample.bam > bam/$sample.bam.flagstat" | $QSUB | |
done | |
############################################################ | |
# Create name sorted BAM files. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="namesort" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; samtools sort -n -m 2000000000 bam/$sample.bam bam/$sample.namesorted" | $QSUB | |
done | |
############################################################ | |
# Count the number of ends for each pair | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="querycount" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=4 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; samtools view bam/$sample.namesorted.bam | cut -f 1 | uniq -c > bam/$sample.namesorted.bam.querycounts" | $QSUB | |
done | |
############################################################ | |
# How many pairs have only one end in the BAM? | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="querycount" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; awk '\$1<2' bam/$sample.namesorted.bam.querycounts > bam/$sample.namesorted.bam.querycounts.lt2" | $QSUB | |
done | |
############################################################ | |
# Create tier the clipped and discordant BAM files. | |
############################################################ | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export SAMPLES="T10AA T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="clipper" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; clipper -i bam/$sample.namesorted.bam" | $QSUB | |
done | |
############################################################ | |
# Create a FASTA file from the clipped (and mostly merged) clipped reads. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="merger" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; merger -i bam/$sample.namesorted.bam.clip.bam > bam/$sample.namesorted.bam.clip.bam.fasta" | $QSUB | |
done | |
############################################################ | |
# Create a FASTQ files from the discordant reads. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="merger" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; bamToFastq -i bam/$sample.namesorted.bam.disc.bam \ | |
-fq1 bam/$sample.namesorted.bam.disc.bam.1.fq \ | |
-fq2 bam/$sample.namesorted.bam.disc.bam.2.fq" | $QSUB | |
done | |
############################################################ | |
# Align the sof-clipped FASTA with BWA-SW for split-read detection. | |
# Z=10 | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="bwa-sw" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=10 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; bwa bwasw -t 8 -z 10 $GENOME \ | |
bam/$sample.namesorted.bam.clip.bam.fasta | \ | |
samtools view -Sb - \ | |
> bam/$sample.namesorted.bam.clip.bam.fasta.bam" | $QSUB | |
done | |
############################################################ | |
# Align the sof-clipped FASTA with BWA-SW for split-read detection. | |
# Z=1 | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="bwa-sw" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=10 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; bwa bwasw -t 8 $GENOME \ | |
bam/$sample.namesorted.bam.clip.bam.fasta | \ | |
samtools view -Sb - \ | |
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam" | $QSUB | |
done | |
############################################################ | |
# Align the disc. with Novoalign. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/novoalign/hg18_full.k15s2.novoindex | |
export STEPNAME="bwa-sw" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=24000m:ncpus=10 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; novoalign -c 8 -d $GENOME -f bam/$sample.namesorted.bam.disc.bam.1.fq bam/$sample.namesorted.bam.disc.bam.2.fq -i 180 36 -r Random -o SAM | samtools view -Sb - > bam/$sample.namesorted.bam.disc.bam" | $QSUB | |
done | |
############################################################ | |
# Sort/index the BWA-SW bams by position | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="bwa-sw" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=2 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; samtools sort -m 2000000000 \ | |
bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam \ | |
bam/$sample.namesorted.bam.clip.bam.fasta.z1.possrt; \ | |
samtools index bam/$sample.namesorted.bam.clip.bam.fasta.possrt.z1.bam;" | $QSUB | |
done | |
############################################################ | |
# split read BWA-SW to BEDPE. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="bedpe" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=6 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; samtools view bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam | \ | |
splitReadSamToBedpe -i stdin \ | |
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.bedpe" | $QSUB | |
done | |
############################################################ | |
# Identify splitters from the BWA-SW alignments. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="splitters" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=6 -N $STEPNAME -m bea -M [email protected]" | |
echo "cd $TUMHOME; awk '\$15>=25' bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.bedpe | \ | |
splitterToBreakpoint -i stdin \ | |
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.splitters" | $QSUB | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment