Skip to content

Instantly share code, notes, and snippets.

@fo40225
Last active February 27, 2025 06:18
Show Gist options
  • Save fo40225/f135b50b3e47d0997098264c3d28e590 to your computer and use it in GitHub Desktop.
Save fo40225/f135b50b3e47d0997098264c3d28e590 to your computer and use it in GitHub Desktop.
annovar + intervar
wget http://www.openbioinformatics.org/annovar/download/0wgxR2rIVP/annovar.latest.tar.gz
tar axvf annovar.latest.tar.gz
cd annovar
# downadad additional script
wget http://www.openbioinformatics.org/annovar/download/prepare_annovar_user.pl
chmod a+x prepare_annovar_user.pl
wget -O index_annovar.pl https://github.com/WGLab/doc-ANNOVAR/files/6670482/index_annovar.txt
chmod a+x index_annovar.pl
# download hg19 all database
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 refGene humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 refGeneWithVer humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 ensGene humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 knownGene humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 ljb26_all humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 dbnsfp30a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 dbnsfp33a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 dbnsfp35a humandb/
# dbnsfp41a is broken
#perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 dbnsfp41a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 dbnsfp42a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 dbnsfp47a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 dbscsnv11 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 cosmic70 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 esp6500siv2_ea humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 esp6500siv2_aa humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 esp6500siv2_all humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 exac03 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 exac03nontcga humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 exac03nonpsych humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gene4denovo201907 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gnomad_exome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gnomad_genome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gnomad211_exome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gnomad211_genome humandb/
# The GnomAD4 database must be generated by manually using GnomAD4 hg38 vcf `picard LiftoverVcf` to hg19 vcf and `bcftools query` to txt.
#perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gnomad40_exome humandb/
#perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gnomad40_genome humandb/
#perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gnomad41_exome humandb/
#perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 gnomad41_genome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 kaviar_20150923 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 hrcr1 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 1000g2014oct humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 1000g2015aug humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 avsnp142 humandb/
# avsnp144 is broken, https://annovar.openbioinformatics.org/en/latest/articles/dbSNP/#additional-discussions
#perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 avsnp144 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 avsnp147 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 avsnp150 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 avsnp151 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 nci60 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 icgc28 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20140702 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20140902 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20150330 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20150629 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20151201 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20160302 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20161128 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20170130 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20170501 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20170905 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20180603 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20190305 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20200316 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20210123 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20210501 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20220320 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20221231 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 clinvar_20240611 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg19 regsnpintron humandb/
perl annotate_variation.pl --downdb --webfrom ucsc --buildver hg19 rmsk humandb/
# downadad all hg38 database
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 refGene humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 refGeneWithVer humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 ensGene humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 knownGene humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 ljb26_all humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 dbnsfp30a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 dbnsfp33a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 dbnsfp35a humandb/
#perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 dbnsfp41a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 dbnsfp42a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 dbnsfp47a humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 dbscsnv11 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 cosmic70 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 esp6500siv2_ea humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 esp6500siv2_aa humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 esp6500siv2_all humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 exac03 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 exac03nontcga humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 exac03nonpsych humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gene4denovo201907 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gnomad_exome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gnomad_genome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gnomad211_exome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gnomad211_genome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gnomad40_exome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gnomad40_genome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gnomad41_exome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 gnomad41_genome humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 kaviar_20150923 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 hrcr1 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 1000g2014oct humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 1000g2015aug humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 avsnp142 humandb/
#perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 avsnp144 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 avsnp147 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 avsnp150 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 avsnp151 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 nci60 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 icgc28 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20140702 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20140902 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20150330 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20150629 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20151201 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20160302 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20161128 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20170130 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20170501 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20170905 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20180603 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20190305 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20200316 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20210123 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20210501 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20220320 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20221231 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 clinvar_20240611 humandb/
perl annotate_variation.pl --downdb --webfrom annovar --buildver hg38 regsnpintron humandb/
perl annotate_variation.pl --downdb --webfrom ucsc --buildver hg38 rmsk humandb/
# reindex to speed up
perl index_annovar.pl --skipsort --bin 1000 humandb/hg19_avsnp147.txt
perl index_annovar.pl --skipsort --bin 50 humandb/hg19_dbnsfp47a.txt
#perl index_annovar.pl --skipsort --bin 100 humandb/hg19_gnomad41_exome.txt
#perl index_annovar.pl --skipsort --bin 1000 humandb/hg19_gnomad41_genome.txt
#perl index_annovar.pl --skipsort --bin 100 humandb/hg19_gnomad_exome.txt
perl index_annovar.pl --skipsort --bin 1000 humandb/hg19_gnomad_genome.txt
perl index_annovar.pl --skipsort --bin 1000 humandb/hg19_hrcr1.txt
perl index_annovar.pl --skipsort --bin 10000 humandb/hg19_icgc28.txt
perl index_annovar.pl --skipsort --bin 1000 humandb/hg19_kaviar_20150923.txt
perl index_annovar.pl --skipsort --bin 1000 humandb/hg38_avsnp147.txt
perl index_annovar.pl --skipsort --bin 50 humandb/hg38_dbnsfp47a.txt
perl index_annovar.pl --skipsort --bin 100 humandb/hg38_gnomad41_exome.txt
#perl index_annovar.pl --skipsort --bin 1000 humandb/hg38_gnomad41_genome.txt
#perl index_annovar.pl --skipsort --bin 100 humandb/hg38_gnomad_exome.txt
perl index_annovar.pl --skipsort --bin 1000 humandb/hg38_gnomad_genome.txt
perl index_annovar.pl --skipsort --bin 1000 humandb/hg38_hrcr1.txt
perl index_annovar.pl --skipsort --bin 10000 humandb/hg38_icgc28.txt
perl index_annovar.pl --skipsort --bin 1000 humandb/hg38_kaviar_20150923.txt
cd ..
# mimic wANNOVAR before 2024-07-12
perl annovar/table_annovar.pl annovar/example/ex2.vcf annovar/humandb/ --outfile ex2 --buildver hg38 \
--protocol refGene,1000g2015aug_all,1000g2015aug_afr,1000g2015aug_amr,1000g2015aug_eas,1000g2015aug_eur,1000g2015aug_sas,exac03,esp6500siv2_all,esp6500siv2_aa,esp6500siv2_ea,nci60,avsnp151,cosmic70,clinvar_20170905,dbnsfp33a,gnomad_exome,gnomad_genome \
--operation g,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f \
--vcfinput --otherinfo --thread $(nproc) --maxgenethread $(nproc)
# mimic wANNOVAR after 2024-07-12
perl annovar/table_annovar.pl annovar/example/ex2.vcf annovar/humandb/ --outfile ex2 --buildver hg38 \
--protocol refGeneWithVer,clinvar_20240611,dbnsfp47a,gnomad41_exome,gnomad41_genome \
--operation g,f,f,f,f \
--vcfinput --otherinfo --thread $(nproc) --maxgenethread $(nproc)
# for intervar
perl annovar/table_annovar.pl annovar/example/ex2.vcf annovar/humandb/ --outfile ex2 --buildver hg38 \
--protocol refGene,esp6500siv2_all,1000g2015aug,avsnp147,dbnsfp42a,clinvar_20210501,gnomad_genome,dbscsnv11,rmsk,ensGene,knownGene \
--operation g,f,f,f,f,f,f,f,r,g,g \
--vcfinput --otherinfo --thread $(nproc) --maxgenethread $(nproc)
git clone https://github.com/WGLab/InterVar.git -b v2.2.1
wget https://omim.org/static/omim/data/mim2gene.txt
mv mim2gene.txt InterVar/intervardb
python InterVar/Intervar.py \
--input=ex2.avinput \
--output=ex2 \
--buildver=hg38 \
--database_intervar=InterVar/intervardb \
--table_annovar=annovar/table_annovar.pl \
--convert2annovar=annovar/convert2annovar.pl \
--annotate_variation=annovar/annotate_variation.pl \
--database_locat=annovar/humandb \
--skip_annovar
# everything
perl annovar/table_annovar.pl annovar/example/ex2.vcf annovar/humandb/ --outfile ex2 --buildver hg38 \
--protocol refGene,1000g2015aug_all,1000g2015aug_afr,1000g2015aug_amr,1000g2015aug_eas,1000g2015aug_eur,1000g2015aug_sas,exac03,esp6500siv2_all,esp6500siv2_aa,esp6500siv2_ea,nci60,avsnp147,cosmic70,clinvar_20240611,dbnsfp47a,gnomad_exome,gnomad_genome,dbscsnv11,rmsk,ensGene,knownGene,kaviar_20150923,hrcr1,regsnpintron,gene4denovo201907,icgc28,refGeneWithVer,gnomad41_exome,gnomad41_genome \
--operation g,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,f,r,g,g,f,f,f,f,f,g,f,f \
--vcfinput --otherinfo --thread $(nproc) --maxgenethread $(nproc)
# more speed up (--mingenelinecount 1)
annotate_variation.pl
206c206
< 'thread=i'=>\$thread, 'maxgenethread=i'=>\$maxgenethread) or pod2usage ();
---
> 'thread=i'=>\$thread, 'maxgenethread=i'=>\$maxgenethread, 'mingenelinecount=i'=>\$mingenelinecount) or pod2usage ();
table_annovar.pl
15c15
< our ($outfile, $buildver, $remove, $checkfile, $protocol, $operation, $otherinfo, $onetranscript, $nastring, $genericdbfile, $gff3dbfile, $bedfile, $vcfdbfile, $csvout, $argument, $tempdir, $vcfinput, $dot2underline, $thread, $maxgenethread, $polishgene, $xreffile, $convertarg, $codingarg, $intronhgvs);
---
> our ($outfile, $buildver, $remove, $checkfile, $protocol, $operation, $otherinfo, $onetranscript, $nastring, $genericdbfile, $gff3dbfile, $bedfile, $vcfdbfile, $csvout, $argument, $tempdir, $vcfinput, $dot2underline, $thread, $maxgenethread, $mingenelinecount, $polishgene, $xreffile, $convertarg, $codingarg, $intronhgvs);
33c33
< 'thread=i'=>\$thread, 'maxgenethread=i'=>\$maxgenethread, 'polishgene!'=>\$polishgene, 'xreffile=s'=>\$xreffile, 'convertarg=s'=>\$convertarg, 'codingarg=s'=>\$codingarg,
---
> 'thread=i'=>\$thread, 'maxgenethread=i'=>\$maxgenethread, 'mingenelinecount=i'=>\$mingenelinecount, 'polishgene!'=>\$polishgene, 'xreffile=s'=>\$xreffile, 'convertarg=s'=>\$convertarg, 'codingarg=s'=>\$codingarg,
441a442,444
> if ($mingenelinecount) {
> $sc .= " -mingenelinecount $mingenelinecount";
> }
832a836
> --mingenelinecount <int> min line counts to enable threaded gene-based annotation (default: 1000000)
968a973,978
>
> =item B<--mingenelinecount>
>
> specify the minimum line counts to enable threaded gene-based annotation
> (default: 1000000). For input files with less lines, the threaded annotation
> will not be used, since it actually cost more time than non-threaded annotation.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment