Last active
October 25, 2023 16:15
-
-
Save SamStudio8/aabb6cdb3ea2a82974084c5411ca0d37 to your computer and use it in GitHub Desktop.
Safely subset a BAM or CRAM with samtools+picard
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Name subset_xam | |
# Description Safely subset a BAM (or CRAM) with samtools+picard | |
# Author @samstudio8 | |
# Date 2022-11-01 | |
# Version 1.0.3 | |
PICARD_JAR=${PICARD_JAR:-} | |
xam=$1 | |
ref=$2 | |
seq=$3 | |
if [ -z "$xam" ] || [ -z "$ref" ] || [ -z "$seq" ]; then | |
echo "Usage: `basename $0` <xam> <ref> <seq>" | |
echo "Missing one of: <xam> <ref> <seq>" | |
exit 64 | |
fi | |
set -euo pipefail | |
command -v samtools >/dev/null 2>&1 || { echo "Cowardly refusing to continue as samtools could not be found."; exit 66; } | |
command -v java >/dev/null 2>&1 || { echo "Cowardly refusing to continue as java (required for picard) could not be found."; exit 66; } | |
tmp=`mktemp -d` | |
ret=$? | |
if [ $ret -ne 0 ]; then | |
echo "Failed to create temp dir!" | |
exit $ret | |
fi | |
echo "Created temp dir $tmp" | |
if [ -z "$PICARD_JAR" ]; then | |
PICARD_JAR="$tmp/picard.jar" | |
echo "PICARD_JAR not set, downloading latest temporarily for you!" | |
echo "Giving you a chance to bail out in case you don't want this..." | |
sleep 3 | |
PICARD_TAG=$(curl -s https://api.github.com/repos/broadinstitute/picard/releases/latest | grep tag_name | cut -d : -f 2,3 | tr -d \\\",' ') | |
echo "Fetching Picard v${PICARD_TAG}" | |
curl -s -o $PICARD_JAR -LO https://github.com/broadinstitute/picard/releases/download/${PICARD_TAG##v}/picard.jar | |
fi | |
# append ref arg to avoid sequence cache miss on CRAM view | |
ref_args="" | |
if [[ "$xam" == *"cram"* ]]; then | |
ref_args="--reference $ref" | |
fi | |
xam_ext=${xam##*.} | |
new_xam=${xam%.*}.${seq}.${xam_ext} | |
echo "Filtering `basename $xam` to $seq" | |
samtools index $xam | |
samtools view -h $xam $seq -o $tmp/tmp.${xam_ext} --write-index $ref_args | |
samtools faidx $ref $seq > $tmp/seq.fa | |
echo "CreateSequenceDictionary" | |
java -jar $PICARD_JAR CreateSequenceDictionary \ | |
-R $tmp/seq.fa \ | |
-O $tmp/seq.dict | |
# remove UR tag from CRAM header as its useless in almost all cases | |
# but especially this one | |
sed -r $'s,[\t]UR:[^\t\\n]+,,' $tmp/seq.dict > $tmp/seq.nour.dict | |
cat -n $tmp/seq.nour.dict | |
echo "ReorderSam" | |
java -jar $PICARD_JAR ReorderSam \ | |
-I $tmp/tmp.${xam_ext} \ | |
-O ${new_xam} \ | |
-SD $tmp/seq.nour.dict \ | |
-R $ref \ | |
--ALLOW_INCOMPLETE_DICT_CONCORDANCE \ | |
--TMP_DIR $tmp \ | |
--VERBOSITY WARNING | |
samtools index $new_xam | |
# try and avoid blowing up user data | |
if [ -n "$tmp" ]; then | |
echo "Cleaning $tmp" | |
rm -rf $tmp | |
fi | |
echo "Subset ${xam_ext}: $new_xam" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment