Skip to content

Instantly share code, notes, and snippets.

@danielecook
Last active February 26, 2020 16:54
Show Gist options
  • Save danielecook/59ccc34c92c8172d4a05261108758c77 to your computer and use it in GitHub Desktop.
Save danielecook/59ccc34c92c8172d4a05261108758c77 to your computer and use it in GitHub Desktop.
fix bam merge
fd --extension bam --change-older-than "2020-02-26 00:00:00" | head -n 100
#!/usr/bin/bash
# Author: Daniel E. Cook
# This script will fix merged IDs'
#SBATCH --job-name unmerge_bams
#SBATCH --part=cpu
#SBATCH --time=2:00:00
#SBATCH --cpus-per-task=6
#SBATCH --mem=4G
#SBATCH -o /camp/project/proj-tracerx-lung/tracerx/_PIPELINE/TRACERx-assets/logs/%j.out
#SBATCH -e /camp/project/proj-tracerx-lung/tracerx/_PIPELINE/TRACERx-assets/logs/%j.err
set -e
CORES=2
# Fix issue associated with bams that were merged without using the -p and -c flags
function fix_merged_bam
{
# Output clean header
{
samtools view --threads ${CORES} -H ${1} | \
grep -E -v 'ID:(bwa|MarkDuplicates|GATK IndelRealigner)(\.[0-9])?\-[A-Z0-9]+' | \
grep -E -v 'ID:.*--[A-Za-z0-9]{12}-[0-9]{1}-[A-Z0-9]+';
samtools view --threads ${CORES} ${1} | \
sed -r 's/(RG.*--[A-Za-z0-9]{12}-[0-9]{1})-[A-Z0-9]+/\1/' | \
sed -r 's/(MarkDuplicates)-[A-Z0-9]+/\1/'
} | samtools view --threads ${CORES} -hb
}
# Fix bam
function fix_bam {
temp_name=${1/.bam/.fix_merge.bam}
>&2 echo "fixing ${1}; Writing to ${temp_name}"
fix_merged_bam ${1} > ${temp_name}
>&2 echo "fixing ${1}; Performing a quickcheck"
samtools quickcheck ${temp_name}
>&2 echo "Indexing ${temp_name}"
samtools index --@ 6 ${1/.bam/.fix_merge.bam}
>&2 echo "Moving: ${temp_name}"
#mv ${temp_name} ${1}
#mv ${temp_name}.bai ${1}.bai
>&2 echo "Done: ${1} Replaced"
}
fix_bam ${1}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment