Skip to content

Instantly share code, notes, and snippets.

@IsmailM
Last active March 14, 2017 19:41
Show Gist options
  • Save IsmailM/fc38a86ebff4ec396a8c1e36d97106a6 to your computer and use it in GitHub Desktop.
Save IsmailM/fc38a86ebff4ec396a8c1e36d97106a6 to your computer and use it in GitHub Desktop.
# DIRECTORIES
VEP=$HOME/data/.progz/ensembl-vep/vep.pl
ANALYSIS_DIR=$HOME/data/analysis
### VEP --ASSEMBLY param
ASSEMBLY_FASTA=${ANALYSIS_DIR}/reference/human_g1k_v37_decoy.fasta
### VEP Custom Annotation
DBSNP_DB=${ANALYSIS_DIR}/dbsnp/All_20161121.vcf.gz
KAVIAR_DB=${ANALYSIS_DIR}/kaviar/VEP_annotation.vcf.gz
GNOMAD_DIR=${ANALYSIS_DIR}/gnomad
### VEP PLUGINs
ExAC_ANOT=${ANALYSIS_DIR}/exac/ExAC.r0.3.1.sites.vep.vcf.gz
CONDEL_CONFIG=$HOME/.vep/Plugins/config/Condel/config
THREADS=80
INPUT_FILE=$1 # First Argument
RUN_DIR=$(dirname $(realpath ${INPUT_FILE}))
SAMPLE=$(basename ${INPUT_FILE} .vcf.gz)
VEP_INPUT=$(realpath ${INPUT_FILE})
VEP_OUTPUT=${RUN_DIR}/${SAMPLE}.VEP.json
NOVEL_OUTPUT=${RUN_DIR}/${SAMPLE}.VEP.novel.json
# Comment out line depending on which custom cache to use
ANNOTATION=''
ANNOTATION="${ANNOTATION} --custom ${DBSNP_DB},dbsnp,vcf,exact"
ANNOTATION="${ANNOTATION} --custom ${KAVIAR_DB},kaviar,vcf,exact"
ANNOTATION="${ANNOTATION} --custom ${GNOMAD_DIR}/gnomad.exomes.r2.0.1.sites.vcf.gz,gnomad_exomes,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.1.vcf.gz,gnomad_1,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.2.vcf.gz,gnomad_2,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.3.vcf.gz,gnomad_3,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.4.vcf.gz,gnomad_4,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.5.vcf.gz,gnomad_5,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.6.vcf.gz,gnomad_6,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.7.vcf.gz,gnomad_7,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.8.vcf.gz,gnomad_8,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.9.vcf.gz,gnomad_9,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.10.vcf.gz,gnomad_10,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.11.vcf.gz,gnomad_11,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.12.vcf.gz,gnomad_12,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.13.vcf.gz,gnomad_13,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.14.vcf.gz,gnomad_14,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.15.vcf.gz,gnomad_15,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.16.vcf.gz,gnomad_16,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.17.vcf.gz,gnomad_17,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.18.vcf.gz,gnomad_18,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.19.vcf.gz,gnomad_19,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.20.vcf.gz,gnomad_20,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.21.vcf.gz,gnomad_21,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.22.vcf.gz,gnomad_22,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.coding.autosomes.vcf.gz,gnomad_coding_autosomes,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.coding.X.vcf.gz,gnomad_coding_X,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.X.vcf.gz,gnomad_X,vcf,exact "
echo perl ${VEP} --verbose --ASSEMBLY GRCh37 --fasta ${ASSEMBLY_FASTA} --input_file ${VEP_INPUT} --output_file ${VEP_OUTPUT} ${ANNOTATION} --force_overwrite --json --fork ${THREADS} --offline --cache --cache_version 87 --check_existing --af --af_1kg --sift b --polyphen b --gene_phenotype --regulatory --symbol --canonical --biotype --pubmed --per_gene --plugin ExAC,${ExAC_ANOT} --plugin Condel,${CONDEL_CONFIG},b --plugin Carol
perl ${VEP} --verbose --ASSEMBLY GRCh37 --fasta ${ASSEMBLY_FASTA} --input_file ${VEP_INPUT} --output_file ${VEP_OUTPUT} ${ANNOTATION} --force_overwrite --json --fork ${THREADS} --offline --cache --cache_version 87 --check_existing --af --af_1kg --sift b --polyphen b --gene_phenotype --regulatory --symbol --canonical --biotype --pubmed --per_gene --plugin ExAC,${ExAC_ANOT} --plugin Condel,${CONDEL_CONFIG},b --plugin Carol
grep -v dbsnp ${VEP_OUTPUT} | grep -v kaviar | grep -v gnomad | \
sed -e 's|$|,|' -e '$s|,$||' | \
(echo '[' && cat - && echo ']') > ${NOVEL_OUTPUT}
# Convert JSON back into VCF
# novel_output_VCF=${RUN_DIR}/${SAMPLE}.VEP.kaviar.dbsnp.novel.vcf
# jq -r '.input' ${NOVEL_OUTPUT} > ${novel_output_VCF}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment