Last active
February 26, 2020 16:54
-
-
Save danielecook/59ccc34c92c8172d4a05261108758c77 to your computer and use it in GitHub Desktop.
fix bam merge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fd --extension bam --change-older-than "2020-02-26 00:00:00" | head -n 100 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/bash | |
# Author: Daniel E. Cook | |
# This script will fix merged IDs' | |
#SBATCH --job-name unmerge_bams | |
#SBATCH --part=cpu | |
#SBATCH --time=2:00:00 | |
#SBATCH --cpus-per-task=6 | |
#SBATCH --mem=4G | |
#SBATCH -o /camp/project/proj-tracerx-lung/tracerx/_PIPELINE/TRACERx-assets/logs/%j.out | |
#SBATCH -e /camp/project/proj-tracerx-lung/tracerx/_PIPELINE/TRACERx-assets/logs/%j.err | |
set -e | |
CORES=2 | |
# Fix issue associated with bams that were merged without using the -p and -c flags | |
function fix_merged_bam | |
{ | |
# Output clean header | |
{ | |
samtools view --threads ${CORES} -H ${1} | \ | |
grep -E -v 'ID:(bwa|MarkDuplicates|GATK IndelRealigner)(\.[0-9])?\-[A-Z0-9]+' | \ | |
grep -E -v 'ID:.*--[A-Za-z0-9]{12}-[0-9]{1}-[A-Z0-9]+'; | |
samtools view --threads ${CORES} ${1} | \ | |
sed -r 's/(RG.*--[A-Za-z0-9]{12}-[0-9]{1})-[A-Z0-9]+/\1/' | \ | |
sed -r 's/(MarkDuplicates)-[A-Z0-9]+/\1/' | |
} | samtools view --threads ${CORES} -hb | |
} | |
# Fix bam | |
function fix_bam { | |
temp_name=${1/.bam/.fix_merge.bam} | |
>&2 echo "fixing ${1}; Writing to ${temp_name}" | |
fix_merged_bam ${1} > ${temp_name} | |
>&2 echo "fixing ${1}; Performing a quickcheck" | |
samtools quickcheck ${temp_name} | |
>&2 echo "Indexing ${temp_name}" | |
samtools index --@ 6 ${1/.bam/.fix_merge.bam} | |
>&2 echo "Moving: ${temp_name}" | |
#mv ${temp_name} ${1} | |
#mv ${temp_name}.bai ${1}.bai | |
>&2 echo "Done: ${1} Replaced" | |
} | |
fix_bam ${1} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment