Skip to content

Instantly share code, notes, and snippets.

@iamaziz
Last active April 13, 2024 01:06
Show Gist options
  • Save iamaziz/4f3da44a064ac99e4328962330488192 to your computer and use it in GitHub Desktop.
Save iamaziz/4f3da44a064ac99e4328962330488192 to your computer and use it in GitHub Desktop.
import torch
from langchain_community.embeddings import (
OpenAIEmbeddings,
OllamaEmbeddings,
HuggingFaceEmbeddings,
HuggingFaceBgeEmbeddings
)
def embedding_func(selected_embedding: str = "HuggingFaceEmbeddings"):
"""
Create and return an embedding object based on the selected embedding type.
This function supports the creation of different embedding representations
for text data using pre-trained models from OpenAI, Ollama, and HuggingFace.
Parameters:
- selected_embedding (str): The type of embedding to use. Supported values are:
- "OpenAIEmbeddings": Use embeddings from OpenAI.
- "OllamaEmbeddings": Use embeddings from Ollama with the "nomic-ai" model.
- "HuggingFaceEmbeddings": Use embeddings from HuggingFace with the "BAAI/bge-small-en-v1.5" model.
- "HuggingFaceBgeEmbeddings": Use BGE embeddings from HuggingFace with a default model. (not used)
Returns:
- An instance of the selected embedding class.
References:
- https://github.com/huggingface/text-embeddings-inference?tab=readme-ov-file#get-started
- https://huggingface.co/BAAI/bge-m3
- https://ollama.com/library/nomic-embed-text
"""
if selected_embedding == "OpenAIEmbeddings":
embedding_func = OpenAIEmbeddings()
elif selected_embedding == "OllamaEmbeddings":
embedding_func = OllamaEmbeddings(model="nomic-embed-text:v1.5")
elif selected_embedding == "HuggingFaceEmbeddings":
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
embedding_func = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5", model_kwargs={"device": device})
else:
raise ValueError("uknown type")
return embedding_func
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment