Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Created November 25, 2015 15:04
Show Gist options
  • Save explodecomputer/1cb83742d6fc4372eb4d to your computer and use it in GitHub Desktop.
Save explodecomputer/1cb83742d6fc4372eb4d to your computer and use it in GitHub Desktop.
extract SNPs and IDs from bgen files
#!/bin/bash
while [[ $# > 1 ]]
do
key="$1"
case $key in
-g|--rootname)
genort="${2}"
shift
;;
-s|--sample)
samplefile="${2}"
shift
;;
-x|--extract)
snplistfile1="${2}"
shift
;;
-e|--exclude)
snplistfile2="${2}"
shift
;;
-r|--remove)
idfile2="${2}"
shift
;;
-o|--out)
outfile="${2}"
shift
;;
-h|--help)
showhelp="yes"
shift
;;
--default)
DEFAULT=YES
shift
;;
*)
# unknown option
;;
esac
shift
done
module add apps/qctool-1.3
function instructions {
echo ""
echo "This script uses qctool (required in your path) to extract SNPs and/or individuals"
echo "from impute2 dosage data that has been split into 23 chromosomes"
echo ""
echo "--rootname [argument] Impute2 output rootname. e.g. if the data is located at"
echo " chr01.gz chr02.gz chr03.gz ... etc"
echo " then use: [email protected]"
echo " where the @ symbol represents the chromosome number"
echo "--sample [argument] Impute2 sample file"
echo "--extract [argument] File containing list of SNPs to keep"
echo "--exclude [argument] File containing list of SNPs to exclude"
echo "--remove [argument] File containing list of IDs to exclude"
echo "--out [argument] Output filename"
exit
}
if [ ! -z "${showhelp}" ]; then
instructions
fi
if [[ -n $1 ]]; then
echo "Unrecognised argument: ${1}"
instructions
exit 1
fi
echo ""
echo "Impute2 root name = ${genort}"
echo "Sample file = ${samplefile}"
echo "Output files = ${outfile}"
cmdbase="qctool -s ${samplefile}"
if [ ! -z "${snplistfile1}" ]; then
cmdbase="${cmdbase} -incl-rsids ${snplistfile1}"
echo "SNPs to keep = ${snplistfile1}"
fi
if [ ! -z "${snplistfile2}" ]; then
cmdbase="${cmdbase} -excl-rsids ${snplistfile2}"
echo "SNPs to exclude = ${snplistfile2}"
fi
if [ ! -z "${idfile2}" ]; then
cmdbase="${cmdbase} --excl-samples ${idfile2}"
echo "IDs to remove = ${idfile1}"
fi
echo ""
mg=""
ms=""
for x in {19..22}
do
i=`printf "%0*d" 2 ${x}`
filename=$(sed -e "s/@/$i/g" <<< ${genort})
if [ ! -f "${filename}" ]; then
echo "The file '${filename}' does not exist"
instructions
fi
cmd="${cmdbase} -g ${filename} -os ${outfile}_${i}.sample -og ${outfile}_${i}.bgen"
${cmd}
og="${outfile}_${i}.bgen"
os="${outfile}_${i}.sample"
mg="${mg} -g ${og} -s ${os}"
done
printf "\nMerging...\n"
outfileg="${outfile}.bgen"
outfiles="${outfile}.sample"
# This bit doesn't work
# qctool ${mg} -og ${outfile}.bgen -os ${outfile}.sample
# rm ${outfile}_*
echo "Done"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment