Last active
July 2, 2024 23:16
-
-
Save chuyqa/e122155055c1e74fbdc0a47f0d5e9c72 to your computer and use it in GitHub Desktop.
PGVector Local LLM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "c5289586-0778-4203-a302-1782e8d4c173", | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# !pip install pgvector asyncpg psycopg2-binary openai" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"id": "88344404-5a4f-41f0-a37a-db3d523d7c74", | |
"metadata": {}, | |
"source": [ | |
"### RAG - Use the raw psycopg2 and openai python bindings to ask questions about our data\n", | |
"\n", | |
"0. Takes a user query string\n", | |
"1. Asks LLM to convert query string to a list 10 meaningful keywords to increase vector db surface area<br> `Calls: /v1/chat/completions`\n", | |
"2. Converts 10 keywords to a vector of embeddings <br> `Calls: /v1/embeddings`\n", | |
"3. Query PGVector for the closest vectors to those embeddings <br> `Queries: test.data_pihole_1_test_5`\n", | |
"4. Passes the text associated to those vectors and the user query to an llm <br> `Calls: /v1/chat/completions`\n", | |
"\n", | |
"---" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "c5f3707f-f66e-475b-813b-60519747e332", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'TheBloke_WizardCoder-Python-34B-V1.0-GPTQ'" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# 0.0 Set local openai and verify llm model\n", | |
"import os\n", | |
"os.environ['OPENAI_API_KEY']=\"sk-1\"\n", | |
"os.environ['OPENAI_API_BASE']=\"http://playground:5001/v1\"\n", | |
"import openai\n", | |
"import logging\n", | |
"import sys\n", | |
"import psycopg2\n", | |
"from pgvector.psycopg2 import register_vector\n", | |
"\n", | |
"logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',level=logging.INFO)\n", | |
"openai.Model.list()['data'][0]['id']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "01bf99f3-2638-483e-8b00-1aa67db0ff32", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Define your query string\n", | |
"query_str = \"What crypto exchanges are the users accessing?\"\n", | |
"\n", | |
"LLM_SYS_MSG={\n", | |
" \"role\": \"system\",\n", | |
" \"content\": \"You are a helpful assistant that analyzes network logs. Make your response as concise as possible, with no introduction or background at the start.\"\n", | |
" }" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"id": "675c7f5f-7afb-443d-8177-b8751b779d96", | |
"metadata": {}, | |
"source": [ | |
"## 1. Extract the meaning from the users question, and generate 10 relevant keywords to search on" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "fbfa0b1f-3f66-490c-9b7f-34ae8b115093", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"10/23/2023 07:36:19 AM 1. Querying openai.ChatCompletion to provide 10 keywords to use in similarity search.\n", | |
"10/23/2023 07:36:19 AM \t 1. Model: TheBloke_WizardCoder-Python-34B-V1.0-GPTQ\n", | |
"10/23/2023 07:36:19 AM \t 1. user prompt: What crypto exchanges are the users accessing?\n", | |
"10/23/2023 07:36:29 AM \t 1. llm suggested 10 keywords: ['1. Users', '2. Accessing', '3. Exchanges', '4. Databases', '5. Cryptocurrencies', '6. Trading', '7. Bitcoin', '8. Ethereum', '9. Wallets', '10. Addresses']\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 8.2 ms, sys: 938 µs, total: 9.14 ms\n", | |
"Wall time: 10.4 s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"logging.info(\"1. Querying openai.ChatCompletion to provide 10 keywords to use in similarity search.\")\n", | |
"logging.info(\"\\t 1. Model: %s\",openai.Model.list()['data'][0]['id'])\n", | |
"logging.info(\"\\t 1. user prompt: %s\",query_str)\n", | |
"LLM_Q1_MSG ={ \n", | |
" \"role\": \"user\",\n", | |
" \"content\": f\"\"\"Provide only a list of 10 words or nouns that could be meaningful in a vector database query for the following prompt: {query_str}.\n", | |
" Do not add any context or text other than the list of 10 words.\"\"\"\n", | |
"} \n", | |
"LLM_R1_KEYWORDS = openai.ChatCompletion.create(\n", | |
" model=openai.Model.list()['data'][0]['id'],\n", | |
" messages=[\n", | |
" LLM_SYS_MSG,\n", | |
" LLM_Q1_MSG\n", | |
" ])\n", | |
"keywords = LLM_R1_KEYWORDS['choices'][0]['message']['content'].strip().split('\\r\\n')\n", | |
"logging.info(\"\\t 1. llm suggested 10 keywords: %s\",keywords)" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"id": "f712890d-e2fd-4e9e-b70c-147bc66f14ca", | |
"metadata": {}, | |
"source": [ | |
"## 2. Convert the keywords string to an embedding vector" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "48416427-c403-40d5-b9df-f8d701577e83", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"10/23/2023 07:36:34 AM 2. Calling openai.Embedding.create to convert keywords list to a 768 vector\n", | |
"10/23/2023 07:36:34 AM \t 2. Done. len(vector_keywords): 768\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 5.45 ms, sys: 0 ns, total: 5.45 ms\n", | |
"Wall time: 76.8 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"logging.info(\"2. Calling openai.Embedding.create to convert keywords list to a 768 vector\")\n", | |
"keywords_str = ' '.join(keywords)\n", | |
"LLM_Q2_EMB = openai.Embedding.create(\n", | |
" input=keywords_str,\n", | |
" model=\"text-embedding-ada-002\"\n", | |
")\n", | |
"vector_keywords = LLM_Q2_EMB['data'][0]['embedding']\n", | |
"logging.info(\"\\t 2. Done. len(vector_keywords): %s\",len(vector_keywords))" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"id": "7991b09e-04b6-4761-9217-5c516b56d4c4", | |
"metadata": {}, | |
"source": [ | |
"## 3. Query Postgres for the text, whose embedding vector is closest to the vector from 2" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "0619e429-255f-486b-923d-b57f55f91339", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"10/23/2023 07:36:35 AM 3. Querying pg.data_pihole_1_test_5 nearby vectors\n", | |
"10/23/2023 07:36:36 AM \t 3. DB Version: PostgreSQL 15.4 (Debian 15.4-2.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit\n", | |
"10/23/2023 07:36:36 AM \t 3. psycopg2 Version: 2.9.9 (dt dec pq3 ext lo64)\n", | |
"10/23/2023 07:36:36 AM \t 3. Retrieved 10 rows. Concated to rows_str length 22769\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 7.27 ms, sys: 5.55 ms, total: 12.8 ms\n", | |
"Wall time: 103 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"logging.info(\"3. Querying pg.data_pihole_1_test_5 nearby vectors\")\n", | |
"# Connect to your PostgreSQL database\n", | |
"conn = psycopg2.connect(database=\"test\", user=\"x\", password=\"x\", host=\"playground\", port=\"5432\")\n", | |
"register_vector(conn)\n", | |
"cur = conn.cursor()\n", | |
"cur.execute(\"SELECT version();\")\n", | |
"logging.info(\"\\t 3. DB Version: %s\",cur.fetchone()[0])\n", | |
"logging.info(\"\\t 3. psycopg2 Version: %s\",psycopg2.__version__)\n", | |
"cur.execute(\"SELECT text FROM data_pihole_1_test_5 ORDER BY embedding <-> %s::vector LIMIT 10\", # 1gpu max limit 2\n", | |
" (vector_keywords,))\n", | |
"DB_RESP_Q3_TEXT = cur.fetchall()\n", | |
"cur.close()\n", | |
"conn.close()\n", | |
"# Formatting the retrieved rows into a string for summarization\n", | |
"DB_RESP_Q3_TEXT_STR = '\\n'.join([str(row) for row in DB_RESP_Q3_TEXT])\n", | |
"logging.info(\"\\t 3. Retrieved %s rows. Concated to rows_str length %s\", len(DB_RESP_Q3_TEXT),len(DB_RESP_Q3_TEXT_STR))\n", | |
"\n" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"id": "fb98c79d-fd84-4fca-aeb7-4437a313ccb1", | |
"metadata": {}, | |
"source": [ | |
"### 3.1 Benchmarks stats" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "fe9ff383-a1b5-4ce6-b865-74d4f3fce479", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"10/23/2023 07:36:38 AM \t 3.1 Table size via pg_size_pretty: 1103 MB\n" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 4.25 ms, sys: 3.25 ms, total: 7.5 ms\n", | |
"Wall time: 90.2 ms\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"conn = psycopg2.connect(database=\"test\", user=\"x\", password=\"x\", host=\"playground\", port=\"5432\")\n", | |
"cur = conn.cursor()\n", | |
"cur.execute(\"SELECT pg_size_pretty(pg_total_relation_size('data_pihole_1_test_5'));\")\n", | |
"logging.info( \"\\t 3.1 Table size via pg_size_pretty: %s\",cur.fetchone()[0])\n", | |
"cur.close()\n", | |
"conn.close()" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"id": "2bb71cea-0d38-4bf4-acce-f3eaade23aa8", | |
"metadata": {}, | |
"source": [ | |
"**Maximum podman stats usage while 3 is running**\n", | |
"```\n", | |
"ID NAME CPU % MEM USAGE / LIMIT MEM % NET IO BLOCK IO PIDS CPU TIME AVG CPU %\n", | |
"5f16c487d201 pg 0.15% 42.7MB / 134.4GB 0.03% 52.08MB / 361.9MB 572.1MB / 947.2kB 21 46.748944s 0.15%\n", | |
"```" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"id": "b6709807-06e2-4a87-9f4c-8e7557346a48", | |
"metadata": {}, | |
"source": [ | |
"## 4. Pass as much context as your llm gpu supports and have fun." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "12a96c24-4537-45e8-b4c1-7efa8b8e4ea3", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"10/23/2023 07:36:41 AM 4. Passing in 22769 context to openai.ChatCompletion...\n", | |
"10/23/2023 07:38:35 AM 5. Summary: Here's one way to parse and analyze the given log files to extract the requested information in a table format.\n", | |
"<REDACTED FOR GIST>" | |
] | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"CPU times: user 10.2 ms, sys: 3.14 ms, total: 13.3 ms\n", | |
"Wall time: 1min 54s\n" | |
] | |
} | |
], | |
"source": [ | |
"%%time\n", | |
"logging.info(\"4. Passing in %s context to openai.ChatCompletion...\",len(DB_RESP_Q3_TEXT_STR))\n", | |
"LLM_Q4_MSG={\n", | |
" \"role\": \"user\",\n", | |
" \"content\": f\"\"\"Given the following network logs: {DB_RESP_Q3_TEXT_STR}. \n", | |
" Can you summarize this data into a table format to answer the question: {query_str}\"\"\"\n", | |
"}\n", | |
"LLM_R4_SMRY = openai.ChatCompletion.create(\n", | |
" model=openai.Model.list()['data'][0]['id'],\n", | |
" messages=[\n", | |
" LLM_SYS_MSG,\n", | |
" LLM_Q4_MSG\n", | |
" ]\n", | |
")\n", | |
"# Extracting the summary from the response\n", | |
"summary = LLM_R4_SMRY['choices'][0]['message']['content'].strip()\n", | |
"logging.info(f\"5. Summary: {summary}\")" | |
] | |
}, | |
{ | |
"attachments": {}, | |
"cell_type": "markdown", | |
"id": "413e6a82-009d-44e9-9a6c-41b0f7ef376d", | |
"metadata": {}, | |
"source": [ | |
"### 4.1 LLM benchmark stats\n", | |
"```\n", | |
"- [23/Oct/2023 07:38:35] \"POST /v1/chat/completions HTTP/1.1\" 200 \n", | |
"Output generated in 113.92 seconds (10.59 tokens/s, 1206 tokens, context 14233, seed 530975340)\n", | |
"\n", | |
"|=========================================+======================+======================|\n", | |
"| 0 NVIDIA GeForce RTX 4060 Ti Off | 00000000:04:00.0 Off | N/A |\n", | |
"| 33% 55C P2 88W / 165W | 14946MiB / 16380MiB | 73% Default |\n", | |
"| | | N/A |\n", | |
"+-----------------------------------------+----------------------+----------------------+\n", | |
"| 1 NVIDIA GeForce RTX 4060 Ti Off | 00000000:0A:00.0 Off | N/A |\n", | |
"| 0% 57C P2 55W / 165W | 7354MiB / 16380MiB | 33% Default |\n", | |
"| | | N/A |\n", | |
"+-----------------------------------------+----------------------+----------------------+\n", | |
"```\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "818f88ec-355c-4ebd-9df4-27de3420fa19", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python (wizmath)", | |
"language": "python", | |
"name": "conda-env-wizmath-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment