mahenzon · October 27, 2025 16:54
diff --git a/README.md b/README.md
diff --git a/embeddings_ru_en_rosberta.py b/embeddings_ru_en_rosberta.py
 import torch
 from langchain_core.embeddings import Embeddings
 from sentence_transformers import SentenceTransformer


 class RuEnRoSBERTaEmbeddings(Embeddings):
    def __init__(
        self,
        model_name: str = "ai-forever/ru-en-RoSBERTa",
        device: str | None = None,
        use_prompt_name: bool = True,  # set to False if your sentence-transformers < 2.4.0
        normalize: bool = True,
        batch_size: int = 32,
    ):
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.model = SentenceTransformer(model_name, device=self.device)
        self.use_prompt_name = use_prompt_name
        self.normalize = normalize
        self.batch_size = batch_size

    def _encode(self, texts: list[str], prompt_name: str) -> list[list[float]]:
        kwargs = {
            "batch_size": self.batch_size,
            "convert_to_numpy": True,
        }
        if self.normalize:
            kwargs["normalize_embeddings"] = True

        if self.use_prompt_name:
            # Requires sentence-transformers >= 2.4.0
            kwargs["prompt_name"] = prompt_name
            encoded = self.model.encode(texts, **kwargs)
        else:
            # Fallback: manually prepend prefixes
            prefix = {
                "search_query": "search_query: ",
                "search_document": "search_document: ",
            }[prompt_name]
            prefixed = [prefix + t for t in texts]
            encoded = self.model.encode(prefixed, **kwargs)

        return encoded.tolist()

    def embed_documents(self, texts: list[str]) -> list[list[float]]:
        # Use "search_document" for indexing documents
        return self._encode(texts, prompt_name="search_document")

    def embed_query(self, text: str) -> list[float]:
        # Use "search_query" for user questions
        return self._encode([text], prompt_name="search_query")[0]
diff --git a/pyproject.toml b/pyproject.toml
 [project]
 name = "gigachain-chat-qa-rag"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
    "chromadb>=1.0.20",
    "dotenv>=0.9.9",
    "ipywidgets>=8.1.7",
    "jupyter>=1.1.1",
    "langchain>=0.3.27",
    "langchain-chroma>=0.2.5",
    "langchain-community>=0.3.29",
    "langchain-gigachat>=0.3.12",
    "langchain-openai>=0.3.32",
    "langchain-redis>=0.2.3",
    "notebook>=7.4.5",
    "python-dotenv>=1.1.1",
    "sentence-transformers>=5.1.0",
    "torch>=2.8.0",
    "torchvision>=0.23.0",
    "transformers>=4.56.0",
 ]
diff --git a/rag-qna-redis-vector.ipynb b/rag-qna-redis-vector.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "initial_id",
   "metadata": {
    "jupyter": {
     "is_executing": true
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from dotenv import find_dotenv, load_dotenv\n",
    "\n",
    "load_dotenv(find_dotenv())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "241c3f043c50f368",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:56:52.234697Z",
     "start_time": "2025-09-02T13:56:52.233342Z"
    }
   },
   "outputs": [],
   "source": [
    "import logging\n",
    "\n",
    "logging.getLogger(\"httpx\").setLevel(logging.WARNING)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "a68f1ba58ed7e1ea",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:56:52.831505Z",
     "start_time": "2025-09-02T13:56:52.236089Z"
    }
   },
   "outputs": [],
   "source": [
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "llm = ChatOpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "71f815eecb77bc6",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:56:54.842698Z",
     "start_time": "2025-09-02T13:56:52.832340Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'По легенде, на картине известного итальянского художника Тициана \"Экспозиция Откровенного Тела Иисуса (Из детища)\" Понтий Пилат изображен в длинном красном плаще с меховым воротником. Однако точно неи'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain.schema import HumanMessage\n",
    "\n",
    "question = \"Какой плащ был у Понтия Пилата?\"\n",
    "llm.invoke([HumanMessage(content=question)]).content[0:200]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "a1277e507bd675f5",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:56:54.863509Z",
     "start_time": "2025-09-02T13:56:54.843878Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total documents: 91\n"
     ]
    }
   ],
   "source": [
    "from langchain_community.document_loaders import TextLoader\n",
    "from langchain.text_splitter import (\n",
    "    RecursiveCharacterTextSplitter,\n",
    ")\n",
    "\n",
    "loader = TextLoader(\"./sample_data/мастер_и_маргарита.txt\")\n",
    "documents = loader.load()\n",
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size=1000,\n",
    "    chunk_overlap=200,\n",
    ")\n",
    "documents = text_splitter.split_documents(documents)\n",
    "print(f\"Total documents: {len(documents)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9b18a0f34531feb0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:57:01.102908Z",
     "start_time": "2025-09-02T13:56:54.865645Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of RobertaModel were not initialized from the model checkpoint at ai-forever/ru-en-RoSBERTa and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "from embeddings_ru_en_rosberta import RuEnRoSBERTaEmbeddings\n",
    "\n",
    "embeddings = RuEnRoSBERTaEmbeddings(\n",
    "    model_name=\"ai-forever/ru-en-RoSBERTa\",\n",
    "    use_prompt_name=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "5d8f49398c6d0fd0",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:57:16.600433Z",
     "start_time": "2025-09-02T13:57:01.103500Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "eb97ff4c1d3643d89d8655ad7f341764",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17:07:56 redisvl.index.index INFO   Index already exists, not overwriting.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "aa3b26134db447fba437d256a35ab161",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from langchain_redis import RedisVectorStore, RedisConfig\n",
    "\n",
    "config = RedisConfig(\n",
    "    index_name=\"Master_and_Margarita\",\n",
    ")\n",
    "\n",
    "vector_store = RedisVectorStore.from_documents(\n",
    "    documents=documents,\n",
    "    embedding=embeddings,\n",
    "    config=config,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "9e8d728ad7df77da",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:57:16.722800Z",
     "start_time": "2025-09-02T13:57:16.601191Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a6eb0a3ec13d411b8d059c3feb7e0a36",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "docs = vector_store.similarity_search(question, k=3)\n",
    "len(docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1339e3f8d207924d",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:57:16.725370Z",
     "start_time": "2025-09-02T13:57:16.723385Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "... ему-то пропал: – Все просто: в белом плаще...\n",
      "\n",
      "\n",
      "\n",
      "Глава 2\n",
      "\n",
      "Понтий Пилат\n",
      "\n",
      "В белом плаще с кровавым подбоем, шаркающей кавалерийской походкой, ранним утром четырнадцатого числа весенн ...\n"
     ]
    }
   ],
   "source": [
    "print(f\"... {str(docs[0])[620:800]} ...\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "fc507d167f3ba66f",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:57:18.512847Z",
     "start_time": "2025-09-02T13:57:16.726017Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "92faeb8c294c46d89459c4940890fcef",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "{'query': 'Какой плащ был у Понтия Пилата?',\n",
       " 'result': 'Понтий Пилат был одет в белый плащ с кровавым подбоем.'}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from langchain.chains import RetrievalQA\n",
    "\n",
    "qa_chain = RetrievalQA.from_chain_type(\n",
    "    llm,\n",
    "    retriever=vector_store.as_retriever(), \n",
    ")\n",
    "\n",
    "qa_chain.invoke({\"query\": question})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e62199724cd293ed",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:57:20.089560Z",
     "start_time": "2025-09-02T13:57:18.513966Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "acd8467b50cc40ba9dc51f57e1962a29",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "{'query': 'Какая трость была у Воланда?',\n",
       " 'result': 'У Воланда в руках была трость с черным набалдашником в виде головы пуделя.'}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "qa_chain.invoke({\"query\": \"Какая трость была у Воланда?\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "c6450392da04e409",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-09-02T13:57:21.780655Z",
     "start_time": "2025-09-02T13:57:20.094606Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8cc070ce89e34d21afa8374a04601fd5",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Batches:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "{'query': 'что не смогли купить герои романа на Патриарших?',\n",
       " 'result': 'Герои романа не смогли купить на Патриарших алкоголь, так как им сказали, что пиво будет привезено к вечеру, а нарзану, который запросил Берлиоз, не оказалось. Вместо этого им предложили абрикосовую, но она была теплой.'}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "qa_chain.invoke({\"query\": \"что не смогли купить герои романа на Патриарших?\"})"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
diff --git a/rag-qna.ipynb b/rag-qna.ipynb
	import torch
	from langchain_core.embeddings import Embeddings
	from sentence_transformers import SentenceTransformer


	class RuEnRoSBERTaEmbeddings(Embeddings):
	def __init__(
	self,
	model_name: str = "ai-forever/ru-en-RoSBERTa",
	device: str \| None = None,
	use_prompt_name: bool = True, # set to False if your sentence-transformers < 2.4.0
	normalize: bool = True,
	batch_size: int = 32,
	):
	self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
	self.model = SentenceTransformer(model_name, device=self.device)
	self.use_prompt_name = use_prompt_name
	self.normalize = normalize
	self.batch_size = batch_size

	def _encode(self, texts: list[str], prompt_name: str) -> list[list[float]]:
	kwargs = {
	"batch_size": self.batch_size,
	"convert_to_numpy": True,
	}
	if self.normalize:
	kwargs["normalize_embeddings"] = True

	if self.use_prompt_name:
	# Requires sentence-transformers >= 2.4.0
	kwargs["prompt_name"] = prompt_name
	encoded = self.model.encode(texts, **kwargs)
	else:
	# Fallback: manually prepend prefixes
	prefix = {
	"search_query": "search_query: ",
	"search_document": "search_document: ",
	}[prompt_name]
	prefixed = [prefix + t for t in texts]
	encoded = self.model.encode(prefixed, **kwargs)

	return encoded.tolist()

	def embed_documents(self, texts: list[str]) -> list[list[float]]:
	# Use "search_document" for indexing documents
	return self._encode(texts, prompt_name="search_document")

	def embed_query(self, text: str) -> list[float]:
	# Use "search_query" for user questions
	return self._encode([text], prompt_name="search_query")[0]
	[project]
	name = "gigachain-chat-qa-rag"
	version = "0.1.0"
	description = "Add your description here"
	readme = "README.md"
	requires-python = ">=3.13"
	dependencies = [
	"chromadb>=1.0.20",
	"dotenv>=0.9.9",
	"ipywidgets>=8.1.7",
	"jupyter>=1.1.1",
	"langchain>=0.3.27",
	"langchain-chroma>=0.2.5",
	"langchain-community>=0.3.29",
	"langchain-gigachat>=0.3.12",
	"langchain-openai>=0.3.32",
	"langchain-redis>=0.2.3",
	"notebook>=7.4.5",
	"python-dotenv>=1.1.1",
	"sentence-transformers>=5.1.0",
	"torch>=2.8.0",
	"torchvision>=0.23.0",
	"transformers>=4.56.0",
	]
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"id": "initial_id",
	"metadata": {
	"jupyter": {
	"is_executing": true
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"True"
	]
	},
	"execution_count": 1,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from dotenv import find_dotenv, load_dotenv\n",
	"\n",
	"load_dotenv(find_dotenv())"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"id": "241c3f043c50f368",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:56:52.234697Z",
	"start_time": "2025-09-02T13:56:52.233342Z"
	}
	},
	"outputs": [],
	"source": [
	"import logging\n",
	"\n",
	"logging.getLogger(\"httpx\").setLevel(logging.WARNING)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"id": "a68f1ba58ed7e1ea",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:56:52.831505Z",
	"start_time": "2025-09-02T13:56:52.236089Z"
	}
	},
	"outputs": [],
	"source": [
	"from langchain_openai import ChatOpenAI\n",
	"\n",
	"llm = ChatOpenAI()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"id": "71f815eecb77bc6",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:56:54.842698Z",
	"start_time": "2025-09-02T13:56:52.832340Z"
	}
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'По легенде, на картине известного итальянского художника Тициана \"Экспозиция Откровенного Тела Иисуса (Из детища)\" Понтий Пилат изображен в длинном красном плаще с меховым воротником. Однако точно неи'"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from langchain.schema import HumanMessage\n",
	"\n",
	"question = \"Какой плащ был у Понтия Пилата?\"\n",
	"llm.invoke([HumanMessage(content=question)]).content[0:200]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"id": "a1277e507bd675f5",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:56:54.863509Z",
	"start_time": "2025-09-02T13:56:54.843878Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Total documents: 91\n"
	]
	}
	],
	"source": [
	"from langchain_community.document_loaders import TextLoader\n",
	"from langchain.text_splitter import (\n",
	" RecursiveCharacterTextSplitter,\n",
	")\n",
	"\n",
	"loader = TextLoader(\"./sample_data/мастер_и_маргарита.txt\")\n",
	"documents = loader.load()\n",
	"text_splitter = RecursiveCharacterTextSplitter(\n",
	" chunk_size=1000,\n",
	" chunk_overlap=200,\n",
	")\n",
	"documents = text_splitter.split_documents(documents)\n",
	"print(f\"Total documents: {len(documents)}\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"id": "9b18a0f34531feb0",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:57:01.102908Z",
	"start_time": "2025-09-02T13:56:54.865645Z"
	}
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"Some weights of RobertaModel were not initialized from the model checkpoint at ai-forever/ru-en-RoSBERTa and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']\n",
	"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
	]
	}
	],
	"source": [
	"from embeddings_ru_en_rosberta import RuEnRoSBERTaEmbeddings\n",
	"\n",
	"embeddings = RuEnRoSBERTaEmbeddings(\n",
	" model_name=\"ai-forever/ru-en-RoSBERTa\",\n",
	" use_prompt_name=True,\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"id": "5d8f49398c6d0fd0",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:57:16.600433Z",
	"start_time": "2025-09-02T13:57:01.103500Z"
	}
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
	"To disable this warning, you can either:\n",
	"\t- Avoid using `tokenizers` before the fork if possible\n",
	"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true \| false)\n"
	]
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "eb97ff4c1d3643d89d8655ad7f341764",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Batches: 0%\| \| 0/1 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"17:07:56 redisvl.index.index INFO Index already exists, not overwriting.\n"
	]
	},
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "aa3b26134db447fba437d256a35ab161",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Batches: 0%\| \| 0/3 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	}
	],
	"source": [
	"from langchain_redis import RedisVectorStore, RedisConfig\n",
	"\n",
	"config = RedisConfig(\n",
	" index_name=\"Master_and_Margarita\",\n",
	")\n",
	"\n",
	"vector_store = RedisVectorStore.from_documents(\n",
	" documents=documents,\n",
	" embedding=embeddings,\n",
	" config=config,\n",
	")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"id": "9e8d728ad7df77da",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:57:16.722800Z",
	"start_time": "2025-09-02T13:57:16.601191Z"
	}
	},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "a6eb0a3ec13d411b8d059c3feb7e0a36",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Batches: 0%\| \| 0/1 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/plain": [
	"3"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"docs = vector_store.similarity_search(question, k=3)\n",
	"len(docs)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"id": "1339e3f8d207924d",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:57:16.725370Z",
	"start_time": "2025-09-02T13:57:16.723385Z"
	}
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"... ему-то пропал: – Все просто: в белом плаще...\n",
	"\n",
	"\n",
	"\n",
	"Глава 2\n",
	"\n",
	"Понтий Пилат\n",
	"\n",
	"В белом плаще с кровавым подбоем, шаркающей кавалерийской походкой, ранним утром четырнадцатого числа весенн ...\n"
	]
	}
	],
	"source": [
	"print(f\"... {str(docs[0])[620:800]} ...\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"id": "fc507d167f3ba66f",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:57:18.512847Z",
	"start_time": "2025-09-02T13:57:16.726017Z"
	}
	},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "92faeb8c294c46d89459c4940890fcef",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Batches: 0%\| \| 0/1 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/plain": [
	"{'query': 'Какой плащ был у Понтия Пилата?',\n",
	" 'result': 'Понтий Пилат был одет в белый плащ с кровавым подбоем.'}"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"from langchain.chains import RetrievalQA\n",
	"\n",
	"qa_chain = RetrievalQA.from_chain_type(\n",
	" llm,\n",
	" retriever=vector_store.as_retriever(), \n",
	")\n",
	"\n",
	"qa_chain.invoke({\"query\": question})"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"id": "e62199724cd293ed",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:57:20.089560Z",
	"start_time": "2025-09-02T13:57:18.513966Z"
	}
	},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "acd8467b50cc40ba9dc51f57e1962a29",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Batches: 0%\| \| 0/1 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/plain": [
	"{'query': 'Какая трость была у Воланда?',\n",
	" 'result': 'У Воланда в руках была трость с черным набалдашником в виде головы пуделя.'}"
	]
	},
	"execution_count": 11,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"qa_chain.invoke({\"query\": \"Какая трость была у Воланда?\"})"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"id": "c6450392da04e409",
	"metadata": {
	"ExecuteTime": {
	"end_time": "2025-09-02T13:57:21.780655Z",
	"start_time": "2025-09-02T13:57:20.094606Z"
	}
	},
	"outputs": [
	{
	"data": {
	"application/vnd.jupyter.widget-view+json": {
	"model_id": "8cc070ce89e34d21afa8374a04601fd5",
	"version_major": 2,
	"version_minor": 0
	},
	"text/plain": [
	"Batches: 0%\| \| 0/1 [00:00<?, ?it/s]"
	]
	},
	"metadata": {},
	"output_type": "display_data"
	},
	{
	"data": {
	"text/plain": [
	"{'query': 'что не смогли купить герои романа на Патриарших?',\n",
	" 'result': 'Герои романа не смогли купить на Патриарших алкоголь, так как им сказали, что пиво будет привезено к вечеру, а нарзану, который запросил Берлиоз, не оказалось. Вместо этого им предложили абрикосовую, но она была теплой.'}"
	]
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"qa_chain.invoke({\"query\": \"что не смогли купить герои романа на Патриарших?\"})"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.13.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}