clintval · March 7, 2018 23:10
diff --git a/dcs-pipeline-template.sh b/dcs-pipeline-template.sh
 #!/bin.sh
 # request Bourne shell as shell for job
 #$ -S /bin/sh
 #$ -cwd
 #$ -V
 #$ -m e
 #$ -pe whole_nodes 7
 #$ -M [email protected]
 ###############################
 module add bwa/0.6.1
 module add samtools/0.1.19
 module add python/2.7.2
 module add numpy/1.6.1
 module add biopython/1.64
 module add pysam/0.7.4
 module add jre/1.7.0-25

 # DS bash script
 # Version 2.1


 #Library Prep (nt):
 # barcode: 8
 # spacer:  5
 # rlength: 150
 # *rlength: 150 - 8 - 5 = 137

 #Post-Processing:
 # Left-side Clip: 4
 # Right-side Clip: 21
 # rlength: 137 - 4 - 21 = 112

 # Step 1: Setup variables for run:
 clear

 # Set up error checking
 # Stop on any error
 set -e
 # Stop on an error inside a pipeline
 # set -o pipefail # not permissable on ROUS server
 # Throw an error on calling an unassigned variable
 set -u

 #DEFAULTS
 DSpath=''
 alignRef=''
 runIdentifier=''
 read1in=seq1.fq
 read2in=seq2.fq
 iSize=-1
 minMem=3
 maxMem=1000
 cutOff=0.7
 nCutOff=0.3
 readLength=150
 barcodeLength=8
 spacerLength=5
 filtersSet='os'
 readTypes='dpm'
 repFilt=9
 readOut=1000000

 #POST_PROCESSING_PATHS
 picard=~/picard-tools-1.119
 GATK=~/GATK

 #NONDEFAULTS

 #FINAL_READ_LENGTH 150-8-5 = 137
 readLength=$((readLength-barcodeLength-spacerLength))

 #LOG_FILE_NAME
 logFile=${runIdentifier}.log.txt

 #Export all variables
 export DSpath
 export alignRef
 export runIdentifier
 export read1in
 export read2in
 export iSize
 export minMem
 export maxMem
 export cutOff
 export nCutOff
 export readLength
 export barcodeLength
 export spacerLength
 export filtersSet
 export readTypes
 export repFilt
 export readOut
 export picard
 export GATK

 # Print out options used to log file
 touch $logFile
 echo "Run identifier: " $runIdentifier | tee -a ${logFile}
 echo "Program path: " $DSpath | tee -a ${logFile}
 echo "Reference genome: " $alignRef | tee -a ${logFile}
 echo "Barcode length: " $barcodeLength | tee -a ${logFile}
 echo "Spacer length: " $spacerLength | tee -a ${logFile}
 echo "Post-tag_to_header read length: " $readLength | tee -a ${logFile}
 echo "Repetitive tag filter length: " $repFilt | tee -a ${logFile}
 echo "Minimum family size: " $minMem | tee -a ${logFile}
 echo "Maximum family size: " $maxMem | tee -a ${logFile}
 echo "Consensus cutoff: " $cutOff | tee -a ${logFile}
 echo "Consensus N cutoff: " $nCutOff | tee -a ${logFile}
 echo "Read types: " $readTypes | tee -a ${logFile}
 echo "Filters: " $filtersSet | tee -a ${logFile}
 echo "" | tee -a ${logFile}

 #  Step 2: Run tag_to_header.py on input files

 echo "Starting Run" | tee -a ${logFile}
 echo "tag_to_header starting"  | tee -a ${logFile}
 date | tee -a ${logFile}
 echo "" | tee -a ${logFile}

 python ${DSpath}/tag_to_header.py \
    --infile1 $read1in \
    --infile2 $read2in \
    --outfile1 ${runIdentifier}.seq1.fq.smi \
    --outfile2 ${runIdentifier}.seq2.fq.smi \
    --barcode_length $barcodeLength \
    --spacer_length $spacerLength \
    --read_out $readOut

 # Step 3: Align sequences

 echo "Aligning with BWA" | tee -a ${logFile}
 date | tee -a ${logFile}

 bwa aln $alignRef ${runIdentifier}.seq1.fq.smi > ${runIdentifier}.seq1.aln
 bwa aln $alignRef ${runIdentifier}.seq2.fq.smi > ${runIdentifier}.seq2.aln
 bwa sampe \
    -s $alignRef \
    ${runIdentifier}.seq1.aln \
    ${runIdentifier}.seq2.aln \
    ${runIdentifier}.seq1.fq.smi \
    ${runIdentifier}.seq2.fq.smi \
    > ${runIdentifier}.pe.sam

 # Step 4: Sort aligned sequences
 echo "Sorting aligned sequences" | tee -a ${logFile}
 date | tee -a ${logFile}

 samtools view -Sbu ${runIdentifier}.pe.sam \
    | samtools sort - ${runIdentifier}.pe.sort

 # Step 5: Run Consensus Maker
 echo "Starting Consensus Maker" | tee -a ${logFile}
 date | tee -a ${logFile}

 python ${DSpath}/ConsensusMaker.py \
    --infile ${runIdentifier}.pe.sort.bam \
    --tagfile ${runIdentifier}.pe.tagcounts \
    --outfile ${runIdentifier}.sscs.bam \
    --minmem $minMem \
    --maxmem $maxMem \
    --readlength $readLength \
    --cutoff $cutOff \
    --Ncutoff $nCutOff \
    --read_type $readTypes \
    --filt $filtersSet \
    --isize $iSize

 # Step 6: Sort SSCSs
 echo "Sorting SSCSs" | tee -a ${logFile}
 date | tee -a ${logFile}

 samtools view -bu ${runIdentifier}.sscs.bam \
    | samtools sort - ${runIdentifier}.sscs.sort.bam

 # Step 7: Index sorted SSCSs
 echo "Indexing sorted SSCSs" | tee -a ${logFile}
 date | tee -a ${logFile}

 samtools index ${runIdentifier}.sscs.sort.bam

 # Step 8: Run Duplex Maker
 echo "Starting Duplex Maker" | tee -a ${logFile}
 date  | tee -a ${logFile}

 python ${DSpath}/DuplexMaker.py \
    --infile ${runIdentifier}.sscs.sort.bam \
    --outfile ${runIdentifier}.dcs.bam \
    --Ncutoff $nCutOff \
    --readlength $readLength \
    --barcode_length $barcodeLength

 # Step 9: Align DCSs
 echo "Aligning DCSs" | tee -a ${logFile}
 date | tee -a ${logFile}

 bwa aln $alignRef ${runIdentifier}.dcs.r1.fq > ${runIdentifier}.dcs.r1.aln
 bwa aln $alignRef ${runIdentifier}.dcs.r2.fq > ${runIdentifier}.dcs.r2.aln

 bwa sampe \
    -s $alignRef ${runIdentifier}.dcs.r1.aln \
    ${runIdentifier}.dcs.r2.aln ${runIdentifier}.dcs.r1.fq \
    ${runIdentifier}.dcs.r2.fq \
    > ${runIdentifier}.dcs.sam

 # Step 10: Sort aligned DCSs
 echo "Sorting aligned DCSs" | tee -a ${logFile}
 date | tee -a ${logFile}

 samtools view -Sbu ${runIdentifier}.dcs.sam \
    | samtools sort - ${runIdentifier}.dcs.aln.sort

 # Step 11: Index sorted DCSs
 echo "Indexing sorted DCSs" | tee -a ${logFile}
 date | tee -a ${logFile}

 samtools index ${runIdentifier}.dcs.aln.sort.bam

 echo "Finished with run " $runIdentifier \
    | tee -a ${logFile}

 date | tee -a ${logFile}


 echo "Attempting post-processing for: " $runIdentifier \
    | tee -a ${logFile}

 date | tee -a ${logFile}

 ###############################################################################
 # POST_PROCESSING
 ###############################################################################

 # Setup
 processFile=${runIdentifier}.dcs.aln.sort.bam
 minDepth=100
 minClonality=0
 maxClonality=0.01

 export processFile
 export minDepth
 export minClonality
 export maxClonality

 echo "File for processing: " $processFile | tee -a ${logFile}
 echo "Minimum Depth: " $minDepth | tee -a ${logFile}
 echo "Minimum Clonality: " $minClonality | tee -a ${logFile}
 echo "Maximum Clonality: " $maxClonality | tee -a ${logFile}

 # Filter for mapping reads
 echo "Filtering" | tee -a ${logFile}
 samtools view \
    -F 4 \
    -b ${runIdentifier}.dcs.aln.sort.bam \
    > ${runIdentifier}.dcs.filt.bam

 # Clipping final file
 echo "Clipping Final File" | tee -a ${logFile}
 java -jar $picard/AddOrReplaceReadGroups.jar \
    INPUT=${runIdentifier}.dcs.filt.bam \
    OUTPUT=${runIdentifier}.dcs.filt.readgroups.bam \
    RGLB=UW \
    RGPL=Illumina \
    RGPU=ATATAT \
    RGSM=default

 samtools index ${runIdentifier}.dcs.filt.readgroups.bam

 java -Xmx2g -jar $gaTK/GenomeAnalysisTK.jar \
    -T RealignerTargetCreator \
    -R $alignRef \
    -I ${runIdentifier}.dcs.filt.readgroups.bam \
    -o ${runIdentifier}.dcs.filt.readgroups.intervals

 java -Xmx2g -jar $gaTK/GenomeAnalysisTK.jar \
    -T IndelRealigner \
    -R $alignRef \
    -I ${runIdentifier}.dcs.filt.readgroups.bam \
    -targetIntervals ${runIdentifier}.dcs.filt.readgroups.intervals \
    -o ${runIdentifier}.dcs.filt.readgroups.realign.bam

 java -Xmx2g -jar $gaTK/GenomeAnalysisTK.jar \
    -T ClipReads \
    -I ${runIdentifier}.dcs.filt.readgroups.realign.bam \
    -o ${runIdentifier}.dcs.filt.readgroups.clipped.bam \
    -R $alignRef \
    --cyclesToTrim "1-4,117-137" \
    --clipRepresentation SOFTCLIP_BASES

 # 137-4-21 = 112

 # Generating stats from final file

 # -B (--no-BAQ) Disable per-Base Alignment Quality... Qualities exist as ASCII J
 # -A (--count-orphans) Do not discard anmalous read pairs
 # -d 500000 (--max-depth INT)

 samtools mpileup \
    -B \
    -A \
    -d 500000 \
    -f $alignRef \
    ${runIdentifier}.dcs.filt.readgroups.clipped.bam \
    | tee ${runIdentifier}.dcs.pileup \
    | python ${DSpath}/CountMuts.py \
    -d $minDepth \
    -c $minClonality \
    -C $maxClonality \
    | tee ${runIdentifier}.dcs.aln.sort.bam.d${minDepth}-c${minClonality}-${maxClonality}.unique.countmuts

 cat ${runIdentifier}.dcs.final.bam.pileup \
    | python ${DSpath}/CountMuts.py \
    -d $minDepth \
    -c $minClonality \
    -C $maxClonality \
    > ${runIdentifier}.dcs.unique.countmuts

 python ${DSpath}/mut-position.py \
    -i ${runIdentifier}.dcs.final.bam.pileup \
    -o ${runIdentifier}.dcs.aln.sort.bam.d${minDepth}-c${minClonality}-${maxClonality}.mutpos \
    -d $minDepth \
    -c $minClonality \
    -C $maxClonality

 echo "Finished post-processing for: " $runIdentifier \
    | tee -a ${logFile}

 date | tee -a ${logFile}
	#!/bin.sh
	# request Bourne shell as shell for job
	#$ -S /bin/sh
	#$ -cwd
	#$ -V
	#$ -m e
	#$ -pe whole_nodes 7
	#$ -M [email protected]
	###############################
	module add bwa/0.6.1
	module add samtools/0.1.19
	module add python/2.7.2
	module add numpy/1.6.1
	module add biopython/1.64
	module add pysam/0.7.4
	module add jre/1.7.0-25

	# DS bash script
	# Version 2.1


	#Library Prep (nt):
	# barcode: 8
	# spacer: 5
	# rlength: 150
	# *rlength: 150 - 8 - 5 = 137

	#Post-Processing:
	# Left-side Clip: 4
	# Right-side Clip: 21
	# rlength: 137 - 4 - 21 = 112

	# Step 1: Setup variables for run:
	clear

	# Set up error checking
	# Stop on any error
	set -e
	# Stop on an error inside a pipeline
	# set -o pipefail # not permissable on ROUS server
	# Throw an error on calling an unassigned variable
	set -u

	#DEFAULTS
	DSpath=''
	alignRef=''
	runIdentifier=''
	read1in=seq1.fq
	read2in=seq2.fq
	iSize=-1
	minMem=3
	maxMem=1000
	cutOff=0.7
	nCutOff=0.3
	readLength=150
	barcodeLength=8
	spacerLength=5
	filtersSet='os'
	readTypes='dpm'
	repFilt=9
	readOut=1000000

	#POST_PROCESSING_PATHS
	picard=~/picard-tools-1.119
	GATK=~/GATK

	#NONDEFAULTS

	#FINAL_READ_LENGTH 150-8-5 = 137
	readLength=$((readLength-barcodeLength-spacerLength))

	#LOG_FILE_NAME
	logFile=${runIdentifier}.log.txt

	#Export all variables
	export DSpath
	export alignRef
	export runIdentifier
	export read1in
	export read2in
	export iSize
	export minMem
	export maxMem
	export cutOff
	export nCutOff
	export readLength
	export barcodeLength
	export spacerLength
	export filtersSet
	export readTypes
	export repFilt
	export readOut
	export picard
	export GATK

	# Print out options used to log file
	touch $logFile
	echo "Run identifier: " $runIdentifier \| tee -a ${logFile}
	echo "Program path: " $DSpath \| tee -a ${logFile}
	echo "Reference genome: " $alignRef \| tee -a ${logFile}
	echo "Barcode length: " $barcodeLength \| tee -a ${logFile}
	echo "Spacer length: " $spacerLength \| tee -a ${logFile}
	echo "Post-tag_to_header read length: " $readLength \| tee -a ${logFile}
	echo "Repetitive tag filter length: " $repFilt \| tee -a ${logFile}
	echo "Minimum family size: " $minMem \| tee -a ${logFile}
	echo "Maximum family size: " $maxMem \| tee -a ${logFile}
	echo "Consensus cutoff: " $cutOff \| tee -a ${logFile}
	echo "Consensus N cutoff: " $nCutOff \| tee -a ${logFile}
	echo "Read types: " $readTypes \| tee -a ${logFile}
	echo "Filters: " $filtersSet \| tee -a ${logFile}
	echo "" \| tee -a ${logFile}

	# Step 2: Run tag_to_header.py on input files

	echo "Starting Run" \| tee -a ${logFile}
	echo "tag_to_header starting" \| tee -a ${logFile}
	date \| tee -a ${logFile}
	echo "" \| tee -a ${logFile}

	python ${DSpath}/tag_to_header.py \
	--infile1 $read1in \
	--infile2 $read2in \
	--outfile1 ${runIdentifier}.seq1.fq.smi \
	--outfile2 ${runIdentifier}.seq2.fq.smi \
	--barcode_length $barcodeLength \
	--spacer_length $spacerLength \
	--read_out $readOut

	# Step 3: Align sequences

	echo "Aligning with BWA" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	bwa aln $alignRef ${runIdentifier}.seq1.fq.smi > ${runIdentifier}.seq1.aln
	bwa aln $alignRef ${runIdentifier}.seq2.fq.smi > ${runIdentifier}.seq2.aln
	bwa sampe \
	-s $alignRef \
	${runIdentifier}.seq1.aln \
	${runIdentifier}.seq2.aln \
	${runIdentifier}.seq1.fq.smi \
	${runIdentifier}.seq2.fq.smi \
	> ${runIdentifier}.pe.sam

	# Step 4: Sort aligned sequences
	echo "Sorting aligned sequences" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	samtools view -Sbu ${runIdentifier}.pe.sam \
	\| samtools sort - ${runIdentifier}.pe.sort

	# Step 5: Run Consensus Maker
	echo "Starting Consensus Maker" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	python ${DSpath}/ConsensusMaker.py \
	--infile ${runIdentifier}.pe.sort.bam \
	--tagfile ${runIdentifier}.pe.tagcounts \
	--outfile ${runIdentifier}.sscs.bam \
	--minmem $minMem \
	--maxmem $maxMem \
	--readlength $readLength \
	--cutoff $cutOff \
	--Ncutoff $nCutOff \
	--read_type $readTypes \
	--filt $filtersSet \
	--isize $iSize

	# Step 6: Sort SSCSs
	echo "Sorting SSCSs" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	samtools view -bu ${runIdentifier}.sscs.bam \
	\| samtools sort - ${runIdentifier}.sscs.sort.bam

	# Step 7: Index sorted SSCSs
	echo "Indexing sorted SSCSs" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	samtools index ${runIdentifier}.sscs.sort.bam

	# Step 8: Run Duplex Maker
	echo "Starting Duplex Maker" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	python ${DSpath}/DuplexMaker.py \
	--infile ${runIdentifier}.sscs.sort.bam \
	--outfile ${runIdentifier}.dcs.bam \
	--Ncutoff $nCutOff \
	--readlength $readLength \
	--barcode_length $barcodeLength

	# Step 9: Align DCSs
	echo "Aligning DCSs" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	bwa aln $alignRef ${runIdentifier}.dcs.r1.fq > ${runIdentifier}.dcs.r1.aln
	bwa aln $alignRef ${runIdentifier}.dcs.r2.fq > ${runIdentifier}.dcs.r2.aln

	bwa sampe \
	-s $alignRef ${runIdentifier}.dcs.r1.aln \
	${runIdentifier}.dcs.r2.aln ${runIdentifier}.dcs.r1.fq \
	${runIdentifier}.dcs.r2.fq \
	> ${runIdentifier}.dcs.sam

	# Step 10: Sort aligned DCSs
	echo "Sorting aligned DCSs" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	samtools view -Sbu ${runIdentifier}.dcs.sam \
	\| samtools sort - ${runIdentifier}.dcs.aln.sort

	# Step 11: Index sorted DCSs
	echo "Indexing sorted DCSs" \| tee -a ${logFile}
	date \| tee -a ${logFile}

	samtools index ${runIdentifier}.dcs.aln.sort.bam

	echo "Finished with run " $runIdentifier \
	\| tee -a ${logFile}

	date \| tee -a ${logFile}


	echo "Attempting post-processing for: " $runIdentifier \
	\| tee -a ${logFile}

	date \| tee -a ${logFile}

	###############################################################################
	# POST_PROCESSING
	###############################################################################

	# Setup
	processFile=${runIdentifier}.dcs.aln.sort.bam
	minDepth=100
	minClonality=0
	maxClonality=0.01

	export processFile
	export minDepth
	export minClonality
	export maxClonality

	echo "File for processing: " $processFile \| tee -a ${logFile}
	echo "Minimum Depth: " $minDepth \| tee -a ${logFile}
	echo "Minimum Clonality: " $minClonality \| tee -a ${logFile}
	echo "Maximum Clonality: " $maxClonality \| tee -a ${logFile}

	# Filter for mapping reads
	echo "Filtering" \| tee -a ${logFile}
	samtools view \
	-F 4 \
	-b ${runIdentifier}.dcs.aln.sort.bam \
	> ${runIdentifier}.dcs.filt.bam

	# Clipping final file
	echo "Clipping Final File" \| tee -a ${logFile}
	java -jar $picard/AddOrReplaceReadGroups.jar \
	INPUT=${runIdentifier}.dcs.filt.bam \
	OUTPUT=${runIdentifier}.dcs.filt.readgroups.bam \
	RGLB=UW \
	RGPL=Illumina \
	RGPU=ATATAT \
	RGSM=default

	samtools index ${runIdentifier}.dcs.filt.readgroups.bam

	java -Xmx2g -jar $gaTK/GenomeAnalysisTK.jar \
	-T RealignerTargetCreator \
	-R $alignRef \
	-I ${runIdentifier}.dcs.filt.readgroups.bam \
	-o ${runIdentifier}.dcs.filt.readgroups.intervals

	java -Xmx2g -jar $gaTK/GenomeAnalysisTK.jar \
	-T IndelRealigner \
	-R $alignRef \
	-I ${runIdentifier}.dcs.filt.readgroups.bam \
	-targetIntervals ${runIdentifier}.dcs.filt.readgroups.intervals \
	-o ${runIdentifier}.dcs.filt.readgroups.realign.bam

	java -Xmx2g -jar $gaTK/GenomeAnalysisTK.jar \
	-T ClipReads \
	-I ${runIdentifier}.dcs.filt.readgroups.realign.bam \
	-o ${runIdentifier}.dcs.filt.readgroups.clipped.bam \
	-R $alignRef \
	--cyclesToTrim "1-4,117-137" \
	--clipRepresentation SOFTCLIP_BASES

	# 137-4-21 = 112

	# Generating stats from final file

	# -B (--no-BAQ) Disable per-Base Alignment Quality... Qualities exist as ASCII J
	# -A (--count-orphans) Do not discard anmalous read pairs
	# -d 500000 (--max-depth INT)

	samtools mpileup \
	-B \
	-A \
	-d 500000 \
	-f $alignRef \
	${runIdentifier}.dcs.filt.readgroups.clipped.bam \
	\| tee ${runIdentifier}.dcs.pileup \
	\| python ${DSpath}/CountMuts.py \
	-d $minDepth \
	-c $minClonality \
	-C $maxClonality \
	\| tee ${runIdentifier}.dcs.aln.sort.bam.d${minDepth}-c${minClonality}-${maxClonality}.unique.countmuts

	cat ${runIdentifier}.dcs.final.bam.pileup \
	\| python ${DSpath}/CountMuts.py \
	-d $minDepth \
	-c $minClonality \
	-C $maxClonality \
	> ${runIdentifier}.dcs.unique.countmuts

	python ${DSpath}/mut-position.py \
	-i ${runIdentifier}.dcs.final.bam.pileup \
	-o ${runIdentifier}.dcs.aln.sort.bam.d${minDepth}-c${minClonality}-${maxClonality}.mutpos \
	-d $minDepth \
	-c $minClonality \
	-C $maxClonality

	echo "Finished post-processing for: " $runIdentifier \
	\| tee -a ${logFile}

	date \| tee -a ${logFile}
No results found