Last active
October 2, 2015 15:21
-
-
Save crazyhottommy/08e554984ba0afd8118b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
### This script is used to filter the vcf file produced by lumpy/speedseq | |
set -e | |
set -u | |
set -o pipefail | |
function usage() { | |
echo " | |
Program: filter_vcf.sh | |
Version: 0.0.1 | |
Author: Ming Tang ([email protected]) | |
usage: filter_vcf.sh [options] <vcf.gz> | |
example: filter_vcf.sh -T 2 -N 2 my.vcf.gz | |
options: -h show this message | |
options: -T minmal Pieces of evidences Support the variants for tumor sample | |
options: -N maximal Pieces of evidences Support the variants for normal sample | |
" | |
} | |
while getopts ":hT:N:" OPTION | |
do | |
case "$OPTION" in | |
h) usage | |
exit 1 | |
;; | |
T) Tumor_SU="${OPTARG}" | |
;; | |
N) Normal_SU="${OPTARG}" | |
;; | |
*) usage | |
exit 1 | |
;; | |
esac | |
done | |
## now parsing the positional parameters | |
shift $[ $OPTIND -1 ] | |
## set vcf to $1, default to empty string if $1 is not supplied | |
## see here http://redsymbol.net/articles/unofficial-bash-strict-mode/ | |
vcf="${1:-}" | |
# check if $vcf is a string with length 0 | |
if [ -z $vcf ] | |
then | |
echo "error! please provide a gzipped vcf file!" | |
usage | |
exit 1 | |
fi | |
##get the header for the vcf which contains the sample names in column 10 and 11 | |
Normal=$(zless $vcf | grep "^#CHROM" | cut -f10) | |
Primary=$(zless $vcf | grep "^#CHROM" | cut -f11) | |
## if there is no recurrent sample, Recurrent will be evaluated to empty string | |
Recurrent=$(zless $vcf | grep "^#CHROM" | cut -f12) | |
## one needs to escape the quotes and special character $ | |
## vawk filter sv present only in tumor (somatic) | |
# cmd=$(printf "vawk '(S\$%s\$GT==\"0/1\" || S\$%s\$GT==\"1/1\") && S\$%s\$GT==\"0/0\" ' " "$Primary" "$Primary" "$Normal") | |
# echo "zless $vcf | $cmd" | sh | |
# can be further filtered with SU <1 in normal and SU >2 in tumor | |
cmd2=$(printf "vawk '(S\$%s\$GT==\"0/1\" || S\$%s\$GT==\"1/1\") && S\$%s\$GT==\"0/0\" && \ | |
S\$%s\$SU <= "${Normal_SU}" && S\$%s\$SU >= "${Tumor_SU}" ' " "$Primary" "$Primary" "$Normal" "$Normal" "$Primary") | |
echo "zless $vcf | $cmd2" | sh |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment