Created
July 18, 2016 07:46
-
-
Save holtgrewe/5517986d90fe551a76b6091340eda79e to your computer and use it in GitHub Desktop.
Jannovar sources INI file for updated
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; INI file with default data sources for Jannovar. | |
; | |
; This file is packed into the Jannovar JAR file. | |
; | |
; Each data source is described in one section. By convention, the section | |
; and data source name is given as "${organism}/${source}". Jannovar will | |
; serialize the file to "${name}.replace('/', '_').replace("\", "_"). | |
; | |
; Users can provide their own INI files using the --data-source parameter | |
; to Jannovar, even orriding the defaults from this file. | |
; | |
; Each section contains a "type" entry that will make Jannovar use the | |
; appropriate DataSource and JannovarDataFactory type for downloading and | |
; parsing. | |
; | |
; Name / Contig ID Mapping | |
; ======================== | |
; | |
; Independent of the type, the "chromInfo" and "chrToAccessions" entry are | |
; expected and give URLs to the "chromInfo.txt.gz" at UCSC describing the | |
; contig/chromosome lengths and "chrToAccessions" at RefSeq with the | |
; official contig names and RefSeq/GenBank identifiers. | |
; | |
; Transcript Files | |
; ================ | |
; | |
; Keys for the types: | |
; | |
; * ucsc: knownGene, knownGeneMrna, kgXref, knownToLocusLink | |
; * ensembl: gtf, cdna, ncrna | |
; * refseq: gff, dna | |
; | |
; Aliasing | |
; ======== | |
; | |
; The mitochondrial chromosome has the key "MT" in RefSeq but "M" in UCSC. | |
; Thus, we allow to create manual aliasing that is executed after loading the | |
; chrToAccessions file from RefSeq and before loading the chromInfo file from | |
; UCSC. Each line has the "alias" key. The value "MT,M,chrM" means that the | |
; contig names "M" and "chrM" are aliased to the contig name "MT" that is | |
; already known from the chrToAccessions file. | |
; | |
; Note that all alias lines are processed in linear order. If the name in the | |
; first entry of the list is not known yet then it is created with the next | |
; free number. This is useful for adding a adding the missing MT chromosome | |
; entry for hg18, for example. | |
; --------------------------------------------------------------------------- | |
; hg18/GRCh36 | |
; --------------------------------------------------------------------------- | |
; HG18 from UCSC | |
[hg18/ucsc] | |
type=ucsc | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/Assembled_chromosomes/chr_NC_gi | |
chrToAccessions.format=chr_NC_gi | |
chrToAccessions.matchLast=HuRef | |
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/knownCanonical.txt.gz | |
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/knownGene.txt.gz | |
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/knownGeneMrna.txt.gz | |
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/kgXref.txt.gz | |
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/knownToLocusLink.txt.gz | |
; HG19 from Ensembl | |
[hg18/ensembl] | |
type=ensembl | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/Assembled_chromosomes/chr_NC_gi | |
chrToAccessions.format=chr_NC_gi | |
chrToAccessions.matchLast=HuRef | |
gtf=ftp://ftp.ensembl.org/pub/release-54/gtf/homo_sapiens/Homo_sapiens.NCBI36.54.gtf.gz | |
cdna=ftp://ftp.ensembl.org/pub/release-54/fasta/homo_sapiens/cdna/Homo_sapiens.NCBI36.54.cdna.all.fa.gz | |
; HG18 from RefSeq | |
[hg18/refseq] | |
type=refseq | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/Assembled_chromosomes/chr_NC_gi | |
chrToAccessions.format=chr_NC_gi | |
chrToAccessions.matchLast=HuRef | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/GFF/ref_NCBI36_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/RNA/rna.fa.gz | |
; HG18 from RefSeq (only curated data sets) | |
[hg18/refseq_curated] | |
type=refseq | |
alias=MT,M,chrM | |
onlyCurated=true | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg18/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/Assembled_chromosomes/chr_NC_gi | |
chrToAccessions.format=chr_NC_gi | |
chrToAccessions.matchLast=HuRef | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/GFF/ref_NCBI36_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/RNA/rna.fa.gz | |
; --------------------------------------------------------------------------- | |
; hg19/GRCh37 | |
; --------------------------------------------------------------------------- | |
; HG19 from UCSC | |
[hg19/ucsc] | |
type=ucsc | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 | |
chrToAccessions.format=chr_accessions | |
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownCanonical.txt.gz | |
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz | |
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGeneMrna.txt.gz | |
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/kgXref.txt.gz | |
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownToLocusLink.txt.gz | |
; HG19 from Ensembl | |
[hg19/ensembl] | |
type=ensembl | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 | |
chrToAccessions.format=chr_accessions | |
gtf=ftp://ftp.ensembl.org/pub/release-74/gtf/homo_sapiens/Homo_sapiens.GRCh37.74.gtf.gz | |
cdna=ftp://ftp.ensembl.org/pub/release-74/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh37.74.cdna.all.fa.gz | |
; HG19 from RefSeq | |
[hg19/refseq] | |
type=refseq | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 | |
chrToAccessions.format=chr_accessions | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/RNA/rna.fa.gz | |
; HG19 from RefSeq (only curated data sets) | |
[hg19/refseq_curated] | |
type=refseq | |
alias=MT,M,chrM | |
onlyCurated=true | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 | |
chrToAccessions.format=chr_accessions | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/RNA/rna.fa.gz | |
; --------------------------------------------------------------------------- | |
; hg38/GRCh38 | |
; --------------------------------------------------------------------------- | |
; HG38 from UCSC | |
[hg38/ucsc] | |
type=ucsc | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/Assembled_chromosomes/chr_accessions_GRCh38.p7 | |
chrToAccessions.format=chr_accessions | |
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/knownCanonical.txt.gz | |
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/knownGene.txt.gz | |
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/knownGeneMrna.txt.gz | |
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/kgXref.txt.gz | |
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/knownToLocusLink.txt.gz | |
; HG38 from Ensembl | |
[hg38/ensembl] | |
type=ensembl | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/Assembled_chromosomes/chr_accessions_GRCh38.p7 | |
chrToAccessions.format=chr_accessions | |
gtf=ftp://ftp.ensembl.org/pub/release-78/gtf/homo_sapiens/Homo_sapiens.GRCh38.78.gtf.gz | |
cdna=ftp://ftp.ensembl.org/pub/release-78/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz | |
; HG38 from RefSeq | |
[hg38/refseq] | |
type=refseq | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/Assembled_chromosomes/chr_accessions_GRCh38.p7 | |
chrToAccessions.format=chr_accessions | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GFF/ref_GRCh38.p2_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/RNA/rna.fa.gz | |
; HG38 from RefSeq (only curated data sets) | |
[hg38/refseq_curated] | |
type=refseq | |
alias=MT,M,chrM | |
onlyCurated=true | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/hg38/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/Assembled_chromosomes/chr_accessions_GRCh38.p7 | |
chrToAccessions.format=chr_accessions | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GFF/ref_GRCh38.p2_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/RNA/rna.fa.gz | |
; --------------------------------------------------------------------------- | |
; mm9 | |
; --------------------------------------------------------------------------- | |
; MM9 from UCSC | |
[mm9/ucsc] | |
type=ucsc | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/Assembled_chromosomes/chr_accessions_MGSCv37 | |
chrToAccessions.format=chr_accessions | |
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/knownCanonical.txt.gz | |
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/knownGene.txt.gz | |
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/knownGeneMrna.txt.gz | |
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/kgXref.txt.gz | |
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/knownToLocusLink.txt.gz | |
; MM9 from Ensembl | |
[mm9/ensembl] | |
type=ensembl | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/Assembled_chromosomes/chr_accessions_MGSCv37 | |
chrToAccessions.format=chr_accessions | |
gtf=ftp://ftp.ensembl.org/pub/release-67/gtf/mus_musculus/Mus_musculus.NCBIM37.67.gtf.gz | |
cdna=ftp://ftp.ensembl.org/pub/release-67/fasta/mus_musculus/cdna/Mus_musculus.NCBIM37.67.cdna.all.fa.gz | |
; MM9 from RefSeq | |
[mm9/refseq] | |
type=refseq | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/Assembled_chromosomes/chr_accessions_MGSCv37 | |
chrToAccessions.format=chr_accessions | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/GFF/ref_MGSCv37_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/RNA/rna.fa.gz | |
; MM9 from RefSeq (only curated data sets) | |
[mm9/refseq_curated] | |
type=refseq | |
alias=MT,M,chrM | |
onlyCurated=true | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm9/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/Assembled_chromosomes/chr_accessions_MGSCv37 | |
chrToAccessions.format=chr_accessions | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/GFF/ref_MGSCv37_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/ARCHIVE/BUILD.37.2/RNA/rna.fa.gz | |
; --------------------------------------------------------------------------- | |
; mm10 | |
; --------------------------------------------------------------------------- | |
; MM10 from UCSC | |
[mm10/ucsc] | |
type=ucsc | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/Assembled_chromosomes/chr_accessions_GRCm38.p3 | |
chrToAccessions.format=chr_accessions | |
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/knownCanonical.txt.gz | |
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/knownGene.txt.gz | |
knownGeneMrna=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/knownGeneMrna.txt.gz | |
kgXref=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/kgXref.txt.gz | |
knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/knownToLocusLink.txt.gz | |
; MM10 from Ensembl | |
[mm10/ensembl] | |
type=ensembl | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/Assembled_chromosomes/chr_accessions_GRCm38.p3 | |
chrToAccessions.format=chr_accessions | |
gtf=ftp://ftp.ensembl.org/pub/release-74/gtf/mus_musculus/Mus_musculus.GRCm38.74.gtf.gz | |
cdna=ftp://ftp.ensembl.org/pub/release-74/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.74.cdna.all.fa.gz | |
; MM10 from RefSeq | |
[mm10/refseq] | |
type=refseq | |
alias=MT,M,chrM | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/Assembled_chromosomes/chr_accessions_GRCm38.p3 | |
chrToAccessions.format=chr_accessions | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/GFF/ref_GRCm38.p3_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/RNA/rna.fa.gz | |
; MM10 from RefSeq (only curated data sets) | |
[mm10/refseq_curated] | |
type=refseq | |
alias=MT,M,chrM | |
onlyCurated=true | |
chromInfo=http://hgdownload.soe.ucsc.edu/goldenPath/mm10/database/chromInfo.txt.gz | |
chrToAccessions=ftp://ftp.ncbi.nlm.nih.gov/genomes/Mus_musculus/Assembled_chromosomes/chr_accessions_GRCm38.p3 | |
chrToAccessions.format=chr_accessions | |
gff=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/GFF/ref_GRCm38.p3_top_level.gff3.gz | |
rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/M_musculus/RNA/rna.fa.gz |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment