Skip to content

Instantly share code, notes, and snippets.

@mojaveazure
Last active February 5, 2016 00:11
Show Gist options
  • Save mojaveazure/aaef3fba523129ec0ca1 to your computer and use it in GitHub Desktop.
Save mojaveazure/aaef3fba523129ec0ca1 to your computer and use it in GitHub Desktop.
Perform a pileup using SAMTools and BCFTools
#!/bin/bash
set -e
set -o pipefail
# Check for dependent programs
if ! `command -v samtools > /dev/null 2> /dev/null`; then echo "Please install SAMTools and place in your PATH"; exit 1; fi # Do we have SAMTools installed?
if ! `command -v bcftools > /dev/null 2> /dev/null`; then echo "Please install BCFTools and place in your PATH"; exit 1; fi # Do we have BCFTools installed?
# Create a usage message
function Usage() {
echo -e "\
`basename $0`: sample_list ref_gen [out_file] [bed_file] \n\
where: sample_list is a list of BAM files \n\
ref_gen is a reference FASTA file \n\
[outfile] is the name for the final BCF file \n\
defaults to PILEUP_`date +"%y_%m_%d.%H.%M"`.bcf \n\
[region] is an optional BED file for the pileup \n\
" >&2
exit 1
}
# Export the function to be used later
export -f Usage
# Check for our required arguments
if [[ "$#" < 2 ]]; then Usage; fi
BAM_LIST=$1 # Where is our list of BAM files?
REF_GEN=$2 # Where is our reference genome?
OUT_FILE=${3:-PILEUP_`date +"%y_%m_%d.%H.%M"`.vcf} # What should we call our output BCF file?
BED_FILE=$4 # Do we have an optional BED file?
# Check to make sure that our arguments are real
if ! [[ -f "${BAM_LIST}" ]]; then echo "Failed to find ${BAM_LIST}!" >&2; exit 1; fi # Does our list of BAM files exist?
if ! [[ -f "${REF_GEN}" ]]; then echo "Failed to find ${REF_GEN}!" >&2; exit 1; fi # Does our reference FASTA file exist?
if ! [[ -z "${BED_FILE}" ]] && ! [[ -f "${BED_FILE}" ]]; then echo "Failed to find ${BED_FILE}!" >&2; exit 1; fi # Does the BED file, if specified, exist?
if [[ -f "${OUT_FILE}" ]]; then echo "${OUT_FILE} exists, overwriting..." >&2; fi # Are we overwriting?
# SAMTools mpileup
# -b List of BAM files
# -f Reference FASTA file
# -l Use positions in BED file
# -u Generate uncompressed BCF file
#
# BCFTools view
# -b Output BCF
# -e Likelihood based analyses
# -c SNP calling
# -g Call genotypes at variant sites
# -v Output potential variant sites only
# Check to see if we have an indexed reference FASTA file
function indexReference() {
local reference="$1" # What is our reference?
if ! [[ -f "${reference}".fai ]]; then samtools faidx "${reference}"; fi # If we don't have the FAI file for the reference, make one
return 0 # Finish this function
}
function checkOutName() {
local outName="$1"
local extension="`echo ${outName} | rev | cut -f 1 -d '.' | rev`"
if [[ "${extension}" -eq "bcf" ]]
then
echo "${outName}"
return 0
else
echo "${outName}.bcf"
return 5
fi
}
# A function to run the multi-way pileup
function pileup() {
local sampleList="$1" # Get the list of BAM files
local reference="$2" # Get the reference FASTA file
local output="$3" # Get the name of our output file
local bedFile="$4" # Get the BED file
indexReference "${reference}" # Check for a FAI file for our reference FASTA file
local output="`checkOutName ${output}`" # Make sure we have a valid output file name
if [[ -z "${bedFile}" ]] # Are we missing a BED file?
then # If yes,
samtools mpileup -u -b "${sampleList}" -f "${reference}" | bcftools view -bvcg - > "${output}" # Don't include it in our command for SAMTools
else # If we have it
samtools mpileup -u -b "${sampleList}" -l "${bedFile}" -f "${reference}" | bcftools view -bvcg - > "${output}" # Include it in our command for SAMTools
fi
echo "Final output file can be found at ${output}" >&2
}
pileup ${BAM_LIST} ${REF_GEN} ${OUT_FILE} ${BED_FILE}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment