Skip to content

Instantly share code, notes, and snippets.

@davidkyle
Created November 8, 2023 10:33
Show Gist options
  • Save davidkyle/8b543bd079789041fa3446d1fe7ab349 to your computer and use it in GitHub Desktop.
Save davidkyle/8b543bd079789041fa3446d1fe7ab349 to your computer and use it in GitHub Desktop.
Script to install a local copy of the ELSER model in Elasticsearch
from elasticsearch import Elasticsearch
from eland.ml.pytorch import PyTorchModel
from eland.ml.pytorch.nlp_ml_model import (
TextExpansionInferenceOptions,
NlpTrainedModelConfig,
TrainedModelInput,
NlpBertTokenizationConfig
)
"""
Use this script to install a local copy of the ELSER model in Elasticsearch.
The ELSER model and vocabulary files must be downloaded and stored locally,
point this script at those files by modifying the hard coded `model_path`
and `vocab_path` variables.
Once the path variables and Elasticsearch connection settings have been
updated run the script `python3 upload_local_elser.py`
The ELSER v2 model can be downloaded from:
Model File - https://ml-models.elastic.co/elser_model_2.pt
Vocabulary File - https://ml-models.elastic.co/elser_model_2.vocab.json
The plaform specific version optimised for X86 Linux can be downloaded from:
Model File - https://ml-models.elastic.co/elser_model_2_linux-x86_64.pt
Vocabulary File - https://ml-models.elastic.co/elser_model_2_linux-x86_64.vocab.json
This script requires the Eland package to be installed: https://github.com/elastic/eland
"""
def get_es_client():
"""
Host URL and auth are hard coded, update these for you cluster
"""
es_args = {
'request_timeout': 300,
'verify_certs': False
}
es_args['basic_auth'] = ('elastic-admin', 'elastic-password')
es_args['hosts'] = 'http://localhost:9200'
es_client = Elasticsearch(**es_args)
print(es_client.info())
return es_client
if __name__ == "__main__":
es = get_es_client()
ptm = PyTorchModel(es, 'elser-local')
tokenization_config = NlpBertTokenizationConfig(truncate='first', do_lower_case=True, with_special_tokens=True, max_sequence_length=512)
inference_config = TextExpansionInferenceOptions(tokenization=tokenization_config)
model_config = NlpTrainedModelConfig(
description="ELSER from local upload",
model_type="pytorch",
inference_config=inference_config,
input=TrainedModelInput(
field_names=["text_field"],
),
)
model_path='/PATH/TO/elser_model_2_XXX.pt'
vocab_path='/PATH/TO/elser_model_2.vocab.json'
print(f"Creating model with id '{ptm.model_id}'")
ptm.put_config(config=model_config)
print(f"Uploading model vocabulary")
ptm.put_vocab(vocab_path)
print(f"Uploading model definition")
ptm.put_model(model_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment