Created
June 15, 2023 11:47
-
-
Save hsm207/99fa2c0edee75826f277dbba8e604ea7 to your computer and use it in GitHub Desktop.
how to connect and reconnect to a weaviate vectorstore in langchain
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Started /home/vscode/.cache/weaviate-embedded: process ID 4863\n" | |
] | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"{\"action\":\"startup\",\"default_vectorizer_module\":\"none\",\"level\":\"info\",\"msg\":\"the default vectorizer modules is set to \\\"none\\\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer\",\"time\":\"2023-06-15T11:46:08Z\"}\n", | |
"{\"action\":\"startup\",\"auto_schema_enabled\":true,\"level\":\"info\",\"msg\":\"auto schema enabled setting is set to \\\"true\\\"\",\"time\":\"2023-06-15T11:46:08Z\"}\n", | |
"{\"action\":\"hnsw_vector_cache_prefill\",\"count\":3000,\"index_id\":\"karpathy_gpt_ZT24y9VwtKZf\",\"level\":\"info\",\"limit\":1000000000000,\"msg\":\"prefilled vector cache\",\"time\":\"2023-06-15T11:46:08Z\",\"took\":193942}\n", | |
"{\"level\":\"warning\",\"msg\":\"Multiple vector spaces are present, GraphQL Explore and REST API list objects endpoint module include params has been disabled as a result.\",\"time\":\"2023-06-15T11:46:08Z\"}\n", | |
"{\"action\":\"grpc_startup\",\"level\":\"info\",\"msg\":\"grpc server listening at [::]:50051\",\"time\":\"2023-06-15T11:46:08Z\"}\n", | |
"{\"action\":\"restapi_management\",\"level\":\"info\",\"msg\":\"Serving weaviate at http://127.0.0.1:6666\",\"time\":\"2023-06-15T11:46:08Z\"}\n", | |
"/usr/local/lib/python3.10/subprocess.py:1072: ResourceWarning: subprocess 4863 is still running\n", | |
" _warn(\"subprocess %s is still running\" % self.pid,\n", | |
"ResourceWarning: Enable tracemalloc to get the object allocation traceback\n" | |
] | |
} | |
], | |
"source": [ | |
"import weaviate\n", | |
"\n", | |
"client = weaviate.Client(embedded_options=weaviate.embedded.EmbeddedOptions())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"splits = [\n", | |
" \"Artificial Intelligence (AI) is revolutionizing the way we interact with technology in our daily lives.\",\n", | |
" \"From autonomous driving to personalized recommendations on streaming platforms, AI applications are becoming ubiquitous.\",\n", | |
" \"With advances in machine learning and deep learning, AI systems are becoming increasingly capable and intelligent.\",\n", | |
" \"However, the rise of AI also raises important ethical questions about data privacy and algorithmic bias.\",\n", | |
" \"As AI continues to evolve, it's essential to prioritize transparency and fairness in the design and implementation of AI systems.\",\n", | |
"]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from langchain.vectorstores import Weaviate\n", | |
"from langchain.embeddings.openai import OpenAIEmbeddings\n", | |
"\n", | |
"index_name = \"Karpathy_gpt\"\n", | |
"embeddings = OpenAIEmbeddings()\n", | |
"vectorstore_new = Weaviate.from_texts(\n", | |
" splits, embeddings, client=client, index_name=index_name, text_key=\"text\"\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[Document(page_content=\"As AI continues to evolve, it's essential to prioritize transparency and fairness in the design and implementation of AI systems.\", metadata={})]" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"query = \"What to prioritise?\"\n", | |
"matched_docs = vectorstore_new.similarity_search(query, k=1)\n", | |
"matched_docs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"vectorstore_weviate = Weaviate(\n", | |
" client=client,\n", | |
" index_name=index_name,\n", | |
" text_key=\"text\",\n", | |
" by_text=False,\n", | |
" embedding=embeddings,\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[Document(page_content=\"As AI continues to evolve, it's essential to prioritize transparency and fairness in the design and implementation of AI systems.\", metadata={})]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"query = \"What to prioritise?\"\n", | |
"matched_docs = vectorstore_weviate.similarity_search(query, k=1)\n", | |
"matched_docs" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": ".venv", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.12" | |
}, | |
"orig_nbformat": 4 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment