davidkyle · November 8, 2023 10:33
diff --git a/upload_local_elser.py b/upload_local_elser.py
 from elasticsearch import Elasticsearch

 from eland.ml.pytorch import PyTorchModel
 from eland.ml.pytorch.nlp_ml_model import (
    TextExpansionInferenceOptions, 
    NlpTrainedModelConfig, 
    TrainedModelInput, 
    NlpBertTokenizationConfig
 )

 """
 Use this script to install a local copy of the ELSER model in Elasticsearch.

 The ELSER model and vocabulary files must be downloaded and stored locally,
 point this script at those files by modifying the hard coded `model_path` 
 and `vocab_path` variables. 

 Once the path variables and Elasticsearch connection settings have been
 updated run the script `python3 upload_local_elser.py`

 The ELSER v2 model can be downloaded from:
 Model File - https://ml-models.elastic.co/elser_model_2.pt
 Vocabulary File - https://ml-models.elastic.co/elser_model_2.vocab.json

 The plaform specific version optimised for X86 Linux can be downloaded from:
 Model File - https://ml-models.elastic.co/elser_model_2_linux-x86_64.pt
 Vocabulary File - https://ml-models.elastic.co/elser_model_2_linux-x86_64.vocab.json

 This script requires the Eland package to be installed: https://github.com/elastic/eland
 """
 def get_es_client():
    """
    Host URL and auth are hard coded, update these for you cluster 
    """
    es_args = {
        'request_timeout': 300,
        'verify_certs': False
    }
    es_args['basic_auth'] = ('elastic-admin', 'elastic-password')
    es_args['hosts'] = 'http://localhost:9200'

    es_client = Elasticsearch(**es_args)
    print(es_client.info())

    return es_client

 if __name__ == "__main__":

    es = get_es_client()
    ptm = PyTorchModel(es, 'elser-local')

    tokenization_config = NlpBertTokenizationConfig(truncate='first', do_lower_case=True, with_special_tokens=True, max_sequence_length=512)
    inference_config = TextExpansionInferenceOptions(tokenization=tokenization_config)
    model_config = NlpTrainedModelConfig(
            description="ELSER from local upload",
            model_type="pytorch",
            inference_config=inference_config,
            input=TrainedModelInput(
                field_names=["text_field"],
            ),
        )

    model_path='/PATH/TO/elser_model_2_XXX.pt'
    vocab_path='/PATH/TO/elser_model_2.vocab.json'

    print(f"Creating model with id '{ptm.model_id}'")
    ptm.put_config(config=model_config)

    print(f"Uploading model vocabulary")
    ptm.put_vocab(vocab_path)    

    print(f"Uploading model definition")
    ptm.put_model(model_path)
	from elasticsearch import Elasticsearch

	from eland.ml.pytorch import PyTorchModel
	from eland.ml.pytorch.nlp_ml_model import (
	TextExpansionInferenceOptions,
	NlpTrainedModelConfig,
	TrainedModelInput,
	NlpBertTokenizationConfig
	)

	"""
	Use this script to install a local copy of the ELSER model in Elasticsearch.

	The ELSER model and vocabulary files must be downloaded and stored locally,
	point this script at those files by modifying the hard coded `model_path`
	and `vocab_path` variables.

	Once the path variables and Elasticsearch connection settings have been
	updated run the script `python3 upload_local_elser.py`

	The ELSER v2 model can be downloaded from:
	Model File - https://ml-models.elastic.co/elser_model_2.pt
	Vocabulary File - https://ml-models.elastic.co/elser_model_2.vocab.json

	The plaform specific version optimised for X86 Linux can be downloaded from:
	Model File - https://ml-models.elastic.co/elser_model_2_linux-x86_64.pt
	Vocabulary File - https://ml-models.elastic.co/elser_model_2_linux-x86_64.vocab.json

	This script requires the Eland package to be installed: https://github.com/elastic/eland
	"""
	def get_es_client():
	"""
	Host URL and auth are hard coded, update these for you cluster
	"""
	es_args = {
	'request_timeout': 300,
	'verify_certs': False
	}
	es_args['basic_auth'] = ('elastic-admin', 'elastic-password')
	es_args['hosts'] = 'http://localhost:9200'

	es_client = Elasticsearch(**es_args)
	print(es_client.info())

	return es_client

	if __name__ == "__main__":

	es = get_es_client()
	ptm = PyTorchModel(es, 'elser-local')

	tokenization_config = NlpBertTokenizationConfig(truncate='first', do_lower_case=True, with_special_tokens=True, max_sequence_length=512)
	inference_config = TextExpansionInferenceOptions(tokenization=tokenization_config)
	model_config = NlpTrainedModelConfig(
	description="ELSER from local upload",
	model_type="pytorch",
	inference_config=inference_config,
	input=TrainedModelInput(
	field_names=["text_field"],
	),
	)

	model_path='/PATH/TO/elser_model_2_XXX.pt'
	vocab_path='/PATH/TO/elser_model_2.vocab.json'

	print(f"Creating model with id '{ptm.model_id}'")
	ptm.put_config(config=model_config)

	print(f"Uploading model vocabulary")
	ptm.put_vocab(vocab_path)

	print(f"Uploading model definition")
	ptm.put_model(model_path)