Created
November 25, 2015 15:04
-
-
Save explodecomputer/1cb83742d6fc4372eb4d to your computer and use it in GitHub Desktop.
extract SNPs and IDs from bgen files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
while [[ $# > 1 ]] | |
do | |
key="$1" | |
case $key in | |
-g|--rootname) | |
genort="${2}" | |
shift | |
;; | |
-s|--sample) | |
samplefile="${2}" | |
shift | |
;; | |
-x|--extract) | |
snplistfile1="${2}" | |
shift | |
;; | |
-e|--exclude) | |
snplistfile2="${2}" | |
shift | |
;; | |
-r|--remove) | |
idfile2="${2}" | |
shift | |
;; | |
-o|--out) | |
outfile="${2}" | |
shift | |
;; | |
-h|--help) | |
showhelp="yes" | |
shift | |
;; | |
--default) | |
DEFAULT=YES | |
shift | |
;; | |
*) | |
# unknown option | |
;; | |
esac | |
shift | |
done | |
module add apps/qctool-1.3 | |
function instructions { | |
echo "" | |
echo "This script uses qctool (required in your path) to extract SNPs and/or individuals" | |
echo "from impute2 dosage data that has been split into 23 chromosomes" | |
echo "" | |
echo "--rootname [argument] Impute2 output rootname. e.g. if the data is located at" | |
echo " chr01.gz chr02.gz chr03.gz ... etc" | |
echo " then use: [email protected]" | |
echo " where the @ symbol represents the chromosome number" | |
echo "--sample [argument] Impute2 sample file" | |
echo "--extract [argument] File containing list of SNPs to keep" | |
echo "--exclude [argument] File containing list of SNPs to exclude" | |
echo "--remove [argument] File containing list of IDs to exclude" | |
echo "--out [argument] Output filename" | |
exit | |
} | |
if [ ! -z "${showhelp}" ]; then | |
instructions | |
fi | |
if [[ -n $1 ]]; then | |
echo "Unrecognised argument: ${1}" | |
instructions | |
exit 1 | |
fi | |
echo "" | |
echo "Impute2 root name = ${genort}" | |
echo "Sample file = ${samplefile}" | |
echo "Output files = ${outfile}" | |
cmdbase="qctool -s ${samplefile}" | |
if [ ! -z "${snplistfile1}" ]; then | |
cmdbase="${cmdbase} -incl-rsids ${snplistfile1}" | |
echo "SNPs to keep = ${snplistfile1}" | |
fi | |
if [ ! -z "${snplistfile2}" ]; then | |
cmdbase="${cmdbase} -excl-rsids ${snplistfile2}" | |
echo "SNPs to exclude = ${snplistfile2}" | |
fi | |
if [ ! -z "${idfile2}" ]; then | |
cmdbase="${cmdbase} --excl-samples ${idfile2}" | |
echo "IDs to remove = ${idfile1}" | |
fi | |
echo "" | |
mg="" | |
ms="" | |
for x in {19..22} | |
do | |
i=`printf "%0*d" 2 ${x}` | |
filename=$(sed -e "s/@/$i/g" <<< ${genort}) | |
if [ ! -f "${filename}" ]; then | |
echo "The file '${filename}' does not exist" | |
instructions | |
fi | |
cmd="${cmdbase} -g ${filename} -os ${outfile}_${i}.sample -og ${outfile}_${i}.bgen" | |
${cmd} | |
og="${outfile}_${i}.bgen" | |
os="${outfile}_${i}.sample" | |
mg="${mg} -g ${og} -s ${os}" | |
done | |
printf "\nMerging...\n" | |
outfileg="${outfile}.bgen" | |
outfiles="${outfile}.sample" | |
# This bit doesn't work | |
# qctool ${mg} -og ${outfile}.bgen -os ${outfile}.sample | |
# rm ${outfile}_* | |
echo "Done" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment