Created
January 12, 2023 17:32
-
-
Save vinothpandian/d821b2ffd47682aa436a831e7e3e333e to your computer and use it in GitHub Desktop.
Medical NER - spaCy v3.4 - Config
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is an auto-generated partial config. To use it with 'spacy train' | |
# you can run spacy init fill-config to auto-fill all default settings: | |
# python -m spacy init fill-config ./base_config.cfg ./config.cfg | |
[paths] | |
train = null | |
dev = null | |
vectors = null | |
[system] | |
gpu_allocator = "pytorch" | |
[nlp] | |
lang = "en" | |
pipeline = ["transformer","ner"] | |
batch_size = 128 | |
[components] | |
[components.transformer] | |
factory = "transformer" | |
[components.transformer.model] | |
@architectures = "spacy-transformers.TransformerModel.v3" | |
name = "roberta-base" | |
tokenizer_config = {"use_fast": true} | |
[components.transformer.model.get_spans] | |
@span_getters = "spacy-transformers.strided_spans.v1" | |
window = 128 | |
stride = 96 | |
[components.ner] | |
factory = "ner" | |
[components.ner.model] | |
@architectures = "spacy.TransitionBasedParser.v2" | |
state_type = "ner" | |
extra_state_tokens = false | |
hidden_width = 64 | |
maxout_pieces = 2 | |
use_upper = false | |
nO = null | |
[components.ner.model.tok2vec] | |
@architectures = "spacy-transformers.TransformerListener.v1" | |
grad_factor = 1.0 | |
[components.ner.model.tok2vec.pooling] | |
@layers = "reduce_mean.v1" | |
[corpora] | |
[corpora.train] | |
@readers = "spacy.Corpus.v1" | |
path = ${paths.train} | |
max_length = 0 | |
[corpora.dev] | |
@readers = "spacy.Corpus.v1" | |
path = ${paths.dev} | |
max_length = 0 | |
[training] | |
accumulate_gradient = 3 | |
dev_corpus = "corpora.dev" | |
train_corpus = "corpora.train" | |
[training.optimizer] | |
@optimizers = "Adam.v1" | |
[training.optimizer.learn_rate] | |
@schedules = "warmup_linear.v1" | |
warmup_steps = 250 | |
total_steps = 20000 | |
initial_rate = 5e-5 | |
[training.batcher] | |
@batchers = "spacy.batch_by_padded.v1" | |
discard_oversize = true | |
size = 2000 | |
buffer = 256 | |
[initialize] | |
vectors = ${paths.vectors} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment