Examples with Redis Vector and ChromaDB
Embeddings via ru-en-RoSBERTa https://huggingface.co/ai-forever/ru-en-RoSBERTa
Examples with Redis Vector and ChromaDB
Embeddings via ru-en-RoSBERTa https://huggingface.co/ai-forever/ru-en-RoSBERTa
| import torch | |
| from langchain_core.embeddings import Embeddings | |
| from sentence_transformers import SentenceTransformer | |
| class RuEnRoSBERTaEmbeddings(Embeddings): | |
| def __init__( | |
| self, | |
| model_name: str = "ai-forever/ru-en-RoSBERTa", | |
| device: str | None = None, | |
| use_prompt_name: bool = True, # set to False if your sentence-transformers < 2.4.0 | |
| normalize: bool = True, | |
| batch_size: int = 32, | |
| ): | |
| self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") | |
| self.model = SentenceTransformer(model_name, device=self.device) | |
| self.use_prompt_name = use_prompt_name | |
| self.normalize = normalize | |
| self.batch_size = batch_size | |
| def _encode(self, texts: list[str], prompt_name: str) -> list[list[float]]: | |
| kwargs = { | |
| "batch_size": self.batch_size, | |
| "convert_to_numpy": True, | |
| } | |
| if self.normalize: | |
| kwargs["normalize_embeddings"] = True | |
| if self.use_prompt_name: | |
| # Requires sentence-transformers >= 2.4.0 | |
| kwargs["prompt_name"] = prompt_name | |
| encoded = self.model.encode(texts, **kwargs) | |
| else: | |
| # Fallback: manually prepend prefixes | |
| prefix = { | |
| "search_query": "search_query: ", | |
| "search_document": "search_document: ", | |
| }[prompt_name] | |
| prefixed = [prefix + t for t in texts] | |
| encoded = self.model.encode(prefixed, **kwargs) | |
| return encoded.tolist() | |
| def embed_documents(self, texts: list[str]) -> list[list[float]]: | |
| # Use "search_document" for indexing documents | |
| return self._encode(texts, prompt_name="search_document") | |
| def embed_query(self, text: str) -> list[float]: | |
| # Use "search_query" for user questions | |
| return self._encode([text], prompt_name="search_query")[0] |
| [project] | |
| name = "gigachain-chat-qa-rag" | |
| version = "0.1.0" | |
| description = "Add your description here" | |
| readme = "README.md" | |
| requires-python = ">=3.13" | |
| dependencies = [ | |
| "chromadb>=1.0.20", | |
| "dotenv>=0.9.9", | |
| "ipywidgets>=8.1.7", | |
| "jupyter>=1.1.1", | |
| "langchain>=0.3.27", | |
| "langchain-chroma>=0.2.5", | |
| "langchain-community>=0.3.29", | |
| "langchain-gigachat>=0.3.12", | |
| "langchain-openai>=0.3.32", | |
| "langchain-redis>=0.2.3", | |
| "notebook>=7.4.5", | |
| "python-dotenv>=1.1.1", | |
| "sentence-transformers>=5.1.0", | |
| "torch>=2.8.0", | |
| "torchvision>=0.23.0", | |
| "transformers>=4.56.0", | |
| ] |