Last active
April 13, 2024 01:06
-
-
Save iamaziz/4f3da44a064ac99e4328962330488192 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from langchain_community.embeddings import ( | |
OpenAIEmbeddings, | |
OllamaEmbeddings, | |
HuggingFaceEmbeddings, | |
HuggingFaceBgeEmbeddings | |
) | |
def embedding_func(selected_embedding: str = "HuggingFaceEmbeddings"): | |
""" | |
Create and return an embedding object based on the selected embedding type. | |
This function supports the creation of different embedding representations | |
for text data using pre-trained models from OpenAI, Ollama, and HuggingFace. | |
Parameters: | |
- selected_embedding (str): The type of embedding to use. Supported values are: | |
- "OpenAIEmbeddings": Use embeddings from OpenAI. | |
- "OllamaEmbeddings": Use embeddings from Ollama with the "nomic-ai" model. | |
- "HuggingFaceEmbeddings": Use embeddings from HuggingFace with the "BAAI/bge-small-en-v1.5" model. | |
- "HuggingFaceBgeEmbeddings": Use BGE embeddings from HuggingFace with a default model. (not used) | |
Returns: | |
- An instance of the selected embedding class. | |
References: | |
- https://github.com/huggingface/text-embeddings-inference?tab=readme-ov-file#get-started | |
- https://huggingface.co/BAAI/bge-m3 | |
- https://ollama.com/library/nomic-embed-text | |
""" | |
if selected_embedding == "OpenAIEmbeddings": | |
embedding_func = OpenAIEmbeddings() | |
elif selected_embedding == "OllamaEmbeddings": | |
embedding_func = OllamaEmbeddings(model="nomic-embed-text:v1.5") | |
elif selected_embedding == "HuggingFaceEmbeddings": | |
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu" | |
embedding_func = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5", model_kwargs={"device": device}) | |
else: | |
raise ValueError("uknown type") | |
return embedding_func |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment