Skip to content

Instantly share code, notes, and snippets.

@crazyhottommy
Last active October 2, 2015 15:21
Show Gist options
  • Save crazyhottommy/08e554984ba0afd8118b to your computer and use it in GitHub Desktop.
Save crazyhottommy/08e554984ba0afd8118b to your computer and use it in GitHub Desktop.
#! /bin/bash
### This script is used to filter the vcf file produced by lumpy/speedseq
set -e
set -u
set -o pipefail
function usage() {
echo "
Program: filter_vcf.sh
Version: 0.0.1
Author: Ming Tang ([email protected])
usage: filter_vcf.sh [options] <vcf.gz>
example: filter_vcf.sh -T 2 -N 2 my.vcf.gz
options: -h show this message
options: -T minmal Pieces of evidences Support the variants for tumor sample
options: -N maximal Pieces of evidences Support the variants for normal sample
"
}
while getopts ":hT:N:" OPTION
do
case "$OPTION" in
h) usage
exit 1
;;
T) Tumor_SU="${OPTARG}"
;;
N) Normal_SU="${OPTARG}"
;;
*) usage
exit 1
;;
esac
done
## now parsing the positional parameters
shift $[ $OPTIND -1 ]
## set vcf to $1, default to empty string if $1 is not supplied
## see here http://redsymbol.net/articles/unofficial-bash-strict-mode/
vcf="${1:-}"
# check if $vcf is a string with length 0
if [ -z $vcf ]
then
echo "error! please provide a gzipped vcf file!"
usage
exit 1
fi
##get the header for the vcf which contains the sample names in column 10 and 11
Normal=$(zless $vcf | grep "^#CHROM" | cut -f10)
Primary=$(zless $vcf | grep "^#CHROM" | cut -f11)
## if there is no recurrent sample, Recurrent will be evaluated to empty string
Recurrent=$(zless $vcf | grep "^#CHROM" | cut -f12)
## one needs to escape the quotes and special character $
## vawk filter sv present only in tumor (somatic)
# cmd=$(printf "vawk '(S\$%s\$GT==\"0/1\" || S\$%s\$GT==\"1/1\") && S\$%s\$GT==\"0/0\" ' " "$Primary" "$Primary" "$Normal")
# echo "zless $vcf | $cmd" | sh
# can be further filtered with SU <1 in normal and SU >2 in tumor
cmd2=$(printf "vawk '(S\$%s\$GT==\"0/1\" || S\$%s\$GT==\"1/1\") && S\$%s\$GT==\"0/0\" && \
S\$%s\$SU <= "${Normal_SU}" && S\$%s\$SU >= "${Tumor_SU}" ' " "$Primary" "$Primary" "$Normal" "$Normal" "$Primary")
echo "zless $vcf | $cmd2" | sh
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment