Last active
August 14, 2020 16:43
-
-
Save epwalsh/7749d1723ecc7cffe496d9d635f4552b to your computer and use it in GitHub Desktop.
How to upload transformer weights and tokenizers from AllenNLP models to HuggingFace's model hub: step 1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from allennlp.common.params import Params | |
from allennlp.common.plugins import import_plugins | |
from allennlp.data.tokenizers import Tokenizer, PretrainedTransformerTokenizer | |
from allennlp.models import load_archive | |
from allennlp.modules.token_embedders import PretrainedTransformerEmbedder | |
# Change this to your serialization directory. | |
serialization_dir = "~/my-trained-model" | |
# Make sure all of the classes our model and tokenizer use are registered. | |
import_plugins() | |
# Load the archive from the serialization directory, which contains the trained | |
# model and the params. | |
archive = load_archive(serialization_dir + "/model.tar.gz") | |
# Pull out just the PretrainedTransformerEmbedder part of the model. | |
# You may need to adjust this line slightly depending on how your model is set up. | |
transformer_embedder: PretrainedTransformerEmbedder = \ | |
archive.model._source_embedder._token_embedders["tokens"] | |
# Now load the corresponding tokenizer. | |
# Again, you may need to adjust this line depending on how your config is set up. | |
tokenizer: PretrainedTransformerTokenizer = Tokenizer.from_params( | |
archive.config["dataset_reader"]["source_tokenizer"] | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment