Last active
March 14, 2017 19:41
-
-
Save IsmailM/fc38a86ebff4ec396a8c1e36d97106a6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# DIRECTORIES | |
VEP=$HOME/data/.progz/ensembl-vep/vep.pl | |
ANALYSIS_DIR=$HOME/data/analysis | |
### VEP --ASSEMBLY param | |
ASSEMBLY_FASTA=${ANALYSIS_DIR}/reference/human_g1k_v37_decoy.fasta | |
### VEP Custom Annotation | |
DBSNP_DB=${ANALYSIS_DIR}/dbsnp/All_20161121.vcf.gz | |
KAVIAR_DB=${ANALYSIS_DIR}/kaviar/VEP_annotation.vcf.gz | |
GNOMAD_DIR=${ANALYSIS_DIR}/gnomad | |
### VEP PLUGINs | |
ExAC_ANOT=${ANALYSIS_DIR}/exac/ExAC.r0.3.1.sites.vep.vcf.gz | |
CONDEL_CONFIG=$HOME/.vep/Plugins/config/Condel/config | |
THREADS=80 | |
INPUT_FILE=$1 # First Argument | |
RUN_DIR=$(dirname $(realpath ${INPUT_FILE})) | |
SAMPLE=$(basename ${INPUT_FILE} .vcf.gz) | |
VEP_INPUT=$(realpath ${INPUT_FILE}) | |
VEP_OUTPUT=${RUN_DIR}/${SAMPLE}.VEP.json | |
NOVEL_OUTPUT=${RUN_DIR}/${SAMPLE}.VEP.novel.json | |
# Comment out line depending on which custom cache to use | |
ANNOTATION='' | |
ANNOTATION="${ANNOTATION} --custom ${DBSNP_DB},dbsnp,vcf,exact" | |
ANNOTATION="${ANNOTATION} --custom ${KAVIAR_DB},kaviar,vcf,exact" | |
ANNOTATION="${ANNOTATION} --custom ${GNOMAD_DIR}/gnomad.exomes.r2.0.1.sites.vcf.gz,gnomad_exomes,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.1.vcf.gz,gnomad_1,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.2.vcf.gz,gnomad_2,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.3.vcf.gz,gnomad_3,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.4.vcf.gz,gnomad_4,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.5.vcf.gz,gnomad_5,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.6.vcf.gz,gnomad_6,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.7.vcf.gz,gnomad_7,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.8.vcf.gz,gnomad_8,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.9.vcf.gz,gnomad_9,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.10.vcf.gz,gnomad_10,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.11.vcf.gz,gnomad_11,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.12.vcf.gz,gnomad_12,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.13.vcf.gz,gnomad_13,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.14.vcf.gz,gnomad_14,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.15.vcf.gz,gnomad_15,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.16.vcf.gz,gnomad_16,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.17.vcf.gz,gnomad_17,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.18.vcf.gz,gnomad_18,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.19.vcf.gz,gnomad_19,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.20.vcf.gz,gnomad_20,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.21.vcf.gz,gnomad_21,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.22.vcf.gz,gnomad_22,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.coding.autosomes.vcf.gz,gnomad_coding_autosomes,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.coding.X.vcf.gz,gnomad_coding_X,vcf,exact --custom ${GNOMAD_DIR}/gnomad.genomes.r2.0.1.sites.X.vcf.gz,gnomad_X,vcf,exact " | |
echo perl ${VEP} --verbose --ASSEMBLY GRCh37 --fasta ${ASSEMBLY_FASTA} --input_file ${VEP_INPUT} --output_file ${VEP_OUTPUT} ${ANNOTATION} --force_overwrite --json --fork ${THREADS} --offline --cache --cache_version 87 --check_existing --af --af_1kg --sift b --polyphen b --gene_phenotype --regulatory --symbol --canonical --biotype --pubmed --per_gene --plugin ExAC,${ExAC_ANOT} --plugin Condel,${CONDEL_CONFIG},b --plugin Carol | |
perl ${VEP} --verbose --ASSEMBLY GRCh37 --fasta ${ASSEMBLY_FASTA} --input_file ${VEP_INPUT} --output_file ${VEP_OUTPUT} ${ANNOTATION} --force_overwrite --json --fork ${THREADS} --offline --cache --cache_version 87 --check_existing --af --af_1kg --sift b --polyphen b --gene_phenotype --regulatory --symbol --canonical --biotype --pubmed --per_gene --plugin ExAC,${ExAC_ANOT} --plugin Condel,${CONDEL_CONFIG},b --plugin Carol | |
grep -v dbsnp ${VEP_OUTPUT} | grep -v kaviar | grep -v gnomad | \ | |
sed -e 's|$|,|' -e '$s|,$||' | \ | |
(echo '[' && cat - && echo ']') > ${NOVEL_OUTPUT} | |
# Convert JSON back into VCF | |
# novel_output_VCF=${RUN_DIR}/${SAMPLE}.VEP.kaviar.dbsnp.novel.vcf | |
# jq -r '.input' ${NOVEL_OUTPUT} > ${novel_output_VCF} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment