chuyqa · July 2, 2024 23:16
diff --git a/PG_Rag_Benchmarks.ipynb b/PG_Rag_Benchmarks.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c5289586-0778-4203-a302-1782e8d4c173",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# !pip install pgvector asyncpg psycopg2-binary openai"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "88344404-5a4f-41f0-a37a-db3d523d7c74",
   "metadata": {},
   "source": [
    "### RAG - Use the raw psycopg2 and openai python bindings to ask questions about our data\n",
    "\n",
    "0. Takes a user query string\n",
    "1. Asks LLM to convert query string to a list 10 meaningful keywords to increase vector db surface area<br> `Calls: /v1/chat/completions`\n",
    "2. Converts 10 keywords to a vector of embeddings <br> `Calls: /v1/embeddings`\n",
    "3. Query PGVector for the closest vectors to those embeddings <br> `Queries: test.data_pihole_1_test_5`\n",
    "4. Passes the text associated to those vectors and the user query to an llm <br> `Calls: /v1/chat/completions`\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "c5f3707f-f66e-475b-813b-60519747e332",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'TheBloke_WizardCoder-Python-34B-V1.0-GPTQ'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 0.0 Set local openai and verify llm model\n",
    "import os\n",
    "os.environ['OPENAI_API_KEY']=\"sk-1\"\n",
    "os.environ['OPENAI_API_BASE']=\"http://playground:5001/v1\"\n",
    "import openai\n",
    "import logging\n",
    "import sys\n",
    "import psycopg2\n",
    "from pgvector.psycopg2 import register_vector\n",
    "\n",
    "logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',level=logging.INFO)\n",
    "openai.Model.list()['data'][0]['id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "01bf99f3-2638-483e-8b00-1aa67db0ff32",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define your query string\n",
    "query_str = \"What crypto exchanges are the users accessing?\"\n",
    "\n",
    "LLM_SYS_MSG={\n",
    "    \"role\": \"system\",\n",
    "    \"content\": \"You are a helpful assistant that analyzes network logs. Make your response as concise as possible, with no introduction or background at the start.\"\n",
    "    }"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "675c7f5f-7afb-443d-8177-b8751b779d96",
   "metadata": {},
   "source": [
    "## 1. Extract the meaning from the users question, and generate 10 relevant keywords to search on"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "fbfa0b1f-3f66-490c-9b7f-34ae8b115093",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "10/23/2023 07:36:19 AM 1. Querying openai.ChatCompletion to provide 10 keywords to use in similarity search.\n",
      "10/23/2023 07:36:19 AM \t 1. Model: TheBloke_WizardCoder-Python-34B-V1.0-GPTQ\n",
      "10/23/2023 07:36:19 AM \t 1. user prompt: What crypto exchanges are the users accessing?\n",
      "10/23/2023 07:36:29 AM \t 1. llm suggested 10 keywords:  ['1. Users', '2. Accessing', '3. Exchanges', '4. Databases', '5. Cryptocurrencies', '6. Trading', '7. Bitcoin', '8. Ethereum', '9. Wallets', '10. Addresses']\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 8.2 ms, sys: 938 µs, total: 9.14 ms\n",
      "Wall time: 10.4 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "logging.info(\"1. Querying openai.ChatCompletion to provide 10 keywords to use in similarity search.\")\n",
    "logging.info(\"\\t 1. Model: %s\",openai.Model.list()['data'][0]['id'])\n",
    "logging.info(\"\\t 1. user prompt: %s\",query_str)\n",
    "LLM_Q1_MSG ={ \n",
    "    \"role\": \"user\",\n",
    "    \"content\": f\"\"\"Provide only a list of 10 words or nouns that could be meaningful in a vector database query for the following prompt: {query_str}.\n",
    "    Do not add any context or text other than the list of 10 words.\"\"\"\n",
    "} \n",
    "LLM_R1_KEYWORDS = openai.ChatCompletion.create(\n",
    "    model=openai.Model.list()['data'][0]['id'],\n",
    "    messages=[\n",
    "        LLM_SYS_MSG,\n",
    "        LLM_Q1_MSG\n",
    "        ])\n",
    "keywords = LLM_R1_KEYWORDS['choices'][0]['message']['content'].strip().split('\\r\\n')\n",
    "logging.info(\"\\t 1. llm suggested 10 keywords:  %s\",keywords)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "f712890d-e2fd-4e9e-b70c-147bc66f14ca",
   "metadata": {},
   "source": [
    "## 2. Convert the keywords string to an embedding vector"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "48416427-c403-40d5-b9df-f8d701577e83",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "10/23/2023 07:36:34 AM 2. Calling openai.Embedding.create to convert keywords list to a 768 vector\n",
      "10/23/2023 07:36:34 AM \t 2. Done. len(vector_keywords): 768\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 5.45 ms, sys: 0 ns, total: 5.45 ms\n",
      "Wall time: 76.8 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "logging.info(\"2. Calling openai.Embedding.create to convert keywords list to a 768 vector\")\n",
    "keywords_str = ' '.join(keywords)\n",
    "LLM_Q2_EMB = openai.Embedding.create(\n",
    "    input=keywords_str,\n",
    "    model=\"text-embedding-ada-002\"\n",
    ")\n",
    "vector_keywords = LLM_Q2_EMB['data'][0]['embedding']\n",
    "logging.info(\"\\t 2. Done. len(vector_keywords): %s\",len(vector_keywords))"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "7991b09e-04b6-4761-9217-5c516b56d4c4",
   "metadata": {},
   "source": [
    "## 3. Query Postgres for the text, whose embedding vector is closest to the vector from 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "0619e429-255f-486b-923d-b57f55f91339",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "10/23/2023 07:36:35 AM 3. Querying pg.data_pihole_1_test_5 nearby vectors\n",
      "10/23/2023 07:36:36 AM \t 3. DB Version: PostgreSQL 15.4 (Debian 15.4-2.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit\n",
      "10/23/2023 07:36:36 AM \t 3. psycopg2 Version: 2.9.9 (dt dec pq3 ext lo64)\n",
      "10/23/2023 07:36:36 AM \t 3. Retrieved 10 rows. Concated to rows_str length 22769\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 7.27 ms, sys: 5.55 ms, total: 12.8 ms\n",
      "Wall time: 103 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "logging.info(\"3. Querying pg.data_pihole_1_test_5 nearby vectors\")\n",
    "# Connect to your PostgreSQL database\n",
    "conn = psycopg2.connect(database=\"test\", user=\"x\", password=\"x\", host=\"playground\", port=\"5432\")\n",
    "register_vector(conn)\n",
    "cur = conn.cursor()\n",
    "cur.execute(\"SELECT version();\")\n",
    "logging.info(\"\\t 3. DB Version: %s\",cur.fetchone()[0])\n",
    "logging.info(\"\\t 3. psycopg2 Version: %s\",psycopg2.__version__)\n",
    "cur.execute(\"SELECT text FROM data_pihole_1_test_5 ORDER BY embedding <->   %s::vector  LIMIT 10\",  # 1gpu max limit 2\n",
    "            (vector_keywords,))\n",
    "DB_RESP_Q3_TEXT = cur.fetchall()\n",
    "cur.close()\n",
    "conn.close()\n",
    "# Formatting the retrieved rows into a string for summarization\n",
    "DB_RESP_Q3_TEXT_STR = '\\n'.join([str(row) for row in DB_RESP_Q3_TEXT])\n",
    "logging.info(\"\\t 3. Retrieved %s rows. Concated to rows_str length %s\", len(DB_RESP_Q3_TEXT),len(DB_RESP_Q3_TEXT_STR))\n",
    "\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "fb98c79d-fd84-4fca-aeb7-4437a313ccb1",
   "metadata": {},
   "source": [
    "### 3.1 Benchmarks stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fe9ff383-a1b5-4ce6-b865-74d4f3fce479",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "10/23/2023 07:36:38 AM \t 3.1 Table size via pg_size_pretty: 1103 MB\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 4.25 ms, sys: 3.25 ms, total: 7.5 ms\n",
      "Wall time: 90.2 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "conn = psycopg2.connect(database=\"test\", user=\"x\", password=\"x\", host=\"playground\", port=\"5432\")\n",
    "cur = conn.cursor()\n",
    "cur.execute(\"SELECT pg_size_pretty(pg_total_relation_size('data_pihole_1_test_5'));\")\n",
    "logging.info( \"\\t 3.1 Table size via pg_size_pretty: %s\",cur.fetchone()[0])\n",
    "cur.close()\n",
    "conn.close()"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "2bb71cea-0d38-4bf4-acce-f3eaade23aa8",
   "metadata": {},
   "source": [
    "**Maximum podman stats usage while 3 is running**\n",
    "```\n",
    "ID            NAME        CPU %       MEM USAGE / LIMIT  MEM %       NET IO             BLOCK IO           PIDS        CPU TIME    AVG CPU %\n",
    "5f16c487d201  pg          0.15%       42.7MB / 134.4GB   0.03%       52.08MB / 361.9MB  572.1MB / 947.2kB  21          46.748944s  0.15%\n",
    "```"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "b6709807-06e2-4a87-9f4c-8e7557346a48",
   "metadata": {},
   "source": [
    "## 4. Pass as much context as your llm gpu supports and have fun."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "12a96c24-4537-45e8-b4c1-7efa8b8e4ea3",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "10/23/2023 07:36:41 AM 4. Passing in 22769 context to openai.ChatCompletion...\n",
      "10/23/2023 07:38:35 AM 5. Summary: Here's one way to parse and analyze the given log files to extract the requested information in a table format.\n",
      "<REDACTED FOR GIST>"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 10.2 ms, sys: 3.14 ms, total: 13.3 ms\n",
      "Wall time: 1min 54s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "logging.info(\"4. Passing in %s context to openai.ChatCompletion...\",len(DB_RESP_Q3_TEXT_STR))\n",
    "LLM_Q4_MSG={\n",
    "    \"role\": \"user\",\n",
    "    \"content\": f\"\"\"Given the following network logs: {DB_RESP_Q3_TEXT_STR}. \n",
    "    Can you summarize this data into a table format to answer the question: {query_str}\"\"\"\n",
    "}\n",
    "LLM_R4_SMRY = openai.ChatCompletion.create(\n",
    "    model=openai.Model.list()['data'][0]['id'],\n",
    "    messages=[\n",
    "        LLM_SYS_MSG,\n",
    "        LLM_Q4_MSG\n",
    "    ]\n",
    ")\n",
    "# Extracting the summary from the response\n",
    "summary = LLM_R4_SMRY['choices'][0]['message']['content'].strip()\n",
    "logging.info(f\"5. Summary: {summary}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "413e6a82-009d-44e9-9a6c-41b0f7ef376d",
   "metadata": {},
   "source": [
    "### 4.1 LLM benchmark stats\n",
    "```\n",
    "- [23/Oct/2023 07:38:35] \"POST /v1/chat/completions HTTP/1.1\" 200 \n",
    "Output generated in 113.92 seconds (10.59 tokens/s, 1206 tokens, context 14233, seed 530975340)\n",
    "\n",
    "|=========================================+======================+======================|\n",
    "|   0  NVIDIA GeForce RTX 4060 Ti     Off | 00000000:04:00.0 Off |                  N/A |\n",
    "| 33%   55C    P2              88W / 165W |  14946MiB / 16380MiB |     73%      Default |\n",
    "|                                         |                      |                  N/A |\n",
    "+-----------------------------------------+----------------------+----------------------+\n",
    "|   1  NVIDIA GeForce RTX 4060 Ti     Off | 00000000:0A:00.0 Off |                  N/A |\n",
    "|  0%   57C    P2              55W / 165W |   7354MiB / 16380MiB |     33%      Default |\n",
    "|                                         |                      |                  N/A |\n",
    "+-----------------------------------------+----------------------+----------------------+\n",
    "```\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "818f88ec-355c-4ebd-9df4-27de3420fa19",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (wizmath)",
   "language": "python",
   "name": "conda-env-wizmath-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"id": "c5289586-0778-4203-a302-1782e8d4c173",
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"# !pip install pgvector asyncpg psycopg2-binary openai"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"id": "88344404-5a4f-41f0-a37a-db3d523d7c74",
	"metadata": {},
	"source": [
	"### RAG - Use the raw psycopg2 and openai python bindings to ask questions about our data\n",
	"\n",
	"0. Takes a user query string\n",
	"1. Asks LLM to convert query string to a list 10 meaningful keywords to increase vector db surface area<br> `Calls: /v1/chat/completions`\n",
	"2. Converts 10 keywords to a vector of embeddings <br> `Calls: /v1/embeddings`\n",
	"3. Query PGVector for the closest vectors to those embeddings <br> `Queries: test.data_pihole_1_test_5`\n",
	"4. Passes the text associated to those vectors and the user query to an llm <br> `Calls: /v1/chat/completions`\n",
	"\n",
	"---"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"id": "c5f3707f-f66e-475b-813b-60519747e332",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'TheBloke_WizardCoder-Python-34B-V1.0-GPTQ'"
	]
	},
	"execution_count": 1,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# 0.0 Set local openai and verify llm model\n",
	"import os\n",
	"os.environ['OPENAI_API_KEY']=\"sk-1\"\n",
	"os.environ['OPENAI_API_BASE']=\"http://playground:5001/v1\"\n",
	"import openai\n",
	"import logging\n",
	"import sys\n",
	"import psycopg2\n",
	"from pgvector.psycopg2 import register_vector\n",
	"\n",
	"logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',level=logging.INFO)\n",
	"openai.Model.list()['data'][0]['id']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"id": "01bf99f3-2638-483e-8b00-1aa67db0ff32",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Define your query string\n",
	"query_str = \"What crypto exchanges are the users accessing?\"\n",
	"\n",
	"LLM_SYS_MSG={\n",
	" \"role\": \"system\",\n",
	" \"content\": \"You are a helpful assistant that analyzes network logs. Make your response as concise as possible, with no introduction or background at the start.\"\n",
	" }"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"id": "675c7f5f-7afb-443d-8177-b8751b779d96",
	"metadata": {},
	"source": [
	"## 1. Extract the meaning from the users question, and generate 10 relevant keywords to search on"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"id": "fbfa0b1f-3f66-490c-9b7f-34ae8b115093",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"10/23/2023 07:36:19 AM 1. Querying openai.ChatCompletion to provide 10 keywords to use in similarity search.\n",
	"10/23/2023 07:36:19 AM \t 1. Model: TheBloke_WizardCoder-Python-34B-V1.0-GPTQ\n",
	"10/23/2023 07:36:19 AM \t 1. user prompt: What crypto exchanges are the users accessing?\n",
	"10/23/2023 07:36:29 AM \t 1. llm suggested 10 keywords: ['1. Users', '2. Accessing', '3. Exchanges', '4. Databases', '5. Cryptocurrencies', '6. Trading', '7. Bitcoin', '8. Ethereum', '9. Wallets', '10. Addresses']\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 8.2 ms, sys: 938 µs, total: 9.14 ms\n",
	"Wall time: 10.4 s\n"
	]
	}
	],
	"source": [
	"%%time\n",
	"logging.info(\"1. Querying openai.ChatCompletion to provide 10 keywords to use in similarity search.\")\n",
	"logging.info(\"\\t 1. Model: %s\",openai.Model.list()['data'][0]['id'])\n",
	"logging.info(\"\\t 1. user prompt: %s\",query_str)\n",
	"LLM_Q1_MSG ={ \n",
	" \"role\": \"user\",\n",
	" \"content\": f\"\"\"Provide only a list of 10 words or nouns that could be meaningful in a vector database query for the following prompt: {query_str}.\n",
	" Do not add any context or text other than the list of 10 words.\"\"\"\n",
	"} \n",
	"LLM_R1_KEYWORDS = openai.ChatCompletion.create(\n",
	" model=openai.Model.list()['data'][0]['id'],\n",
	" messages=[\n",
	" LLM_SYS_MSG,\n",
	" LLM_Q1_MSG\n",
	" ])\n",
	"keywords = LLM_R1_KEYWORDS['choices'][0]['message']['content'].strip().split('\\r\\n')\n",
	"logging.info(\"\\t 1. llm suggested 10 keywords: %s\",keywords)"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"id": "f712890d-e2fd-4e9e-b70c-147bc66f14ca",
	"metadata": {},
	"source": [
	"## 2. Convert the keywords string to an embedding vector"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"id": "48416427-c403-40d5-b9df-f8d701577e83",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"10/23/2023 07:36:34 AM 2. Calling openai.Embedding.create to convert keywords list to a 768 vector\n",
	"10/23/2023 07:36:34 AM \t 2. Done. len(vector_keywords): 768\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 5.45 ms, sys: 0 ns, total: 5.45 ms\n",
	"Wall time: 76.8 ms\n"
	]
	}
	],
	"source": [
	"%%time\n",
	"logging.info(\"2. Calling openai.Embedding.create to convert keywords list to a 768 vector\")\n",
	"keywords_str = ' '.join(keywords)\n",
	"LLM_Q2_EMB = openai.Embedding.create(\n",
	" input=keywords_str,\n",
	" model=\"text-embedding-ada-002\"\n",
	")\n",
	"vector_keywords = LLM_Q2_EMB['data'][0]['embedding']\n",
	"logging.info(\"\\t 2. Done. len(vector_keywords): %s\",len(vector_keywords))"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"id": "7991b09e-04b6-4761-9217-5c516b56d4c4",
	"metadata": {},
	"source": [
	"## 3. Query Postgres for the text, whose embedding vector is closest to the vector from 2"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"id": "0619e429-255f-486b-923d-b57f55f91339",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"10/23/2023 07:36:35 AM 3. Querying pg.data_pihole_1_test_5 nearby vectors\n",
	"10/23/2023 07:36:36 AM \t 3. DB Version: PostgreSQL 15.4 (Debian 15.4-2.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit\n",
	"10/23/2023 07:36:36 AM \t 3. psycopg2 Version: 2.9.9 (dt dec pq3 ext lo64)\n",
	"10/23/2023 07:36:36 AM \t 3. Retrieved 10 rows. Concated to rows_str length 22769\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 7.27 ms, sys: 5.55 ms, total: 12.8 ms\n",
	"Wall time: 103 ms\n"
	]
	}
	],
	"source": [
	"%%time\n",
	"logging.info(\"3. Querying pg.data_pihole_1_test_5 nearby vectors\")\n",
	"# Connect to your PostgreSQL database\n",
	"conn = psycopg2.connect(database=\"test\", user=\"x\", password=\"x\", host=\"playground\", port=\"5432\")\n",
	"register_vector(conn)\n",
	"cur = conn.cursor()\n",
	"cur.execute(\"SELECT version();\")\n",
	"logging.info(\"\\t 3. DB Version: %s\",cur.fetchone()[0])\n",
	"logging.info(\"\\t 3. psycopg2 Version: %s\",psycopg2.__version__)\n",
	"cur.execute(\"SELECT text FROM data_pihole_1_test_5 ORDER BY embedding <-> %s::vector LIMIT 10\", # 1gpu max limit 2\n",
	" (vector_keywords,))\n",
	"DB_RESP_Q3_TEXT = cur.fetchall()\n",
	"cur.close()\n",
	"conn.close()\n",
	"# Formatting the retrieved rows into a string for summarization\n",
	"DB_RESP_Q3_TEXT_STR = '\\n'.join([str(row) for row in DB_RESP_Q3_TEXT])\n",
	"logging.info(\"\\t 3. Retrieved %s rows. Concated to rows_str length %s\", len(DB_RESP_Q3_TEXT),len(DB_RESP_Q3_TEXT_STR))\n",
	"\n"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"id": "fb98c79d-fd84-4fca-aeb7-4437a313ccb1",
	"metadata": {},
	"source": [
	"### 3.1 Benchmarks stats"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"id": "fe9ff383-a1b5-4ce6-b865-74d4f3fce479",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"10/23/2023 07:36:38 AM \t 3.1 Table size via pg_size_pretty: 1103 MB\n"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 4.25 ms, sys: 3.25 ms, total: 7.5 ms\n",
	"Wall time: 90.2 ms\n"
	]
	}
	],
	"source": [
	"%%time\n",
	"conn = psycopg2.connect(database=\"test\", user=\"x\", password=\"x\", host=\"playground\", port=\"5432\")\n",
	"cur = conn.cursor()\n",
	"cur.execute(\"SELECT pg_size_pretty(pg_total_relation_size('data_pihole_1_test_5'));\")\n",
	"logging.info( \"\\t 3.1 Table size via pg_size_pretty: %s\",cur.fetchone()[0])\n",
	"cur.close()\n",
	"conn.close()"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"id": "2bb71cea-0d38-4bf4-acce-f3eaade23aa8",
	"metadata": {},
	"source": [
	"Maximum podman stats usage while 3 is running\n",
	"```\n",
	"ID NAME CPU % MEM USAGE / LIMIT MEM % NET IO BLOCK IO PIDS CPU TIME AVG CPU %\n",
	"5f16c487d201 pg 0.15% 42.7MB / 134.4GB 0.03% 52.08MB / 361.9MB 572.1MB / 947.2kB 21 46.748944s 0.15%\n",
	"```"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"id": "b6709807-06e2-4a87-9f4c-8e7557346a48",
	"metadata": {},
	"source": [
	"## 4. Pass as much context as your llm gpu supports and have fun."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"id": "12a96c24-4537-45e8-b4c1-7efa8b8e4ea3",
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"10/23/2023 07:36:41 AM 4. Passing in 22769 context to openai.ChatCompletion...\n",
	"10/23/2023 07:38:35 AM 5. Summary: Here's one way to parse and analyze the given log files to extract the requested information in a table format.\n",
	"<REDACTED FOR GIST>"
	]
	},
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 10.2 ms, sys: 3.14 ms, total: 13.3 ms\n",
	"Wall time: 1min 54s\n"
	]
	}
	],
	"source": [
	"%%time\n",
	"logging.info(\"4. Passing in %s context to openai.ChatCompletion...\",len(DB_RESP_Q3_TEXT_STR))\n",
	"LLM_Q4_MSG={\n",
	" \"role\": \"user\",\n",
	" \"content\": f\"\"\"Given the following network logs: {DB_RESP_Q3_TEXT_STR}. \n",
	" Can you summarize this data into a table format to answer the question: {query_str}\"\"\"\n",
	"}\n",
	"LLM_R4_SMRY = openai.ChatCompletion.create(\n",
	" model=openai.Model.list()['data'][0]['id'],\n",
	" messages=[\n",
	" LLM_SYS_MSG,\n",
	" LLM_Q4_MSG\n",
	" ]\n",
	")\n",
	"# Extracting the summary from the response\n",
	"summary = LLM_R4_SMRY['choices'][0]['message']['content'].strip()\n",
	"logging.info(f\"5. Summary: {summary}\")"
	]
	},
	{
	"attachments": {},
	"cell_type": "markdown",
	"id": "413e6a82-009d-44e9-9a6c-41b0f7ef376d",
	"metadata": {},
	"source": [
	"### 4.1 LLM benchmark stats\n",
	"```\n",
	"- [23/Oct/2023 07:38:35] \"POST /v1/chat/completions HTTP/1.1\" 200 \n",
	"Output generated in 113.92 seconds (10.59 tokens/s, 1206 tokens, context 14233, seed 530975340)\n",
	"\n",
	"\|=========================================+======================+======================\|\n",
	"\| 0 NVIDIA GeForce RTX 4060 Ti Off \| 00000000:04:00.0 Off \| N/A \|\n",
	"\| 33% 55C P2 88W / 165W \| 14946MiB / 16380MiB \| 73% Default \|\n",
	"\| \| \| N/A \|\n",
	"+-----------------------------------------+----------------------+----------------------+\n",
	"\| 1 NVIDIA GeForce RTX 4060 Ti Off \| 00000000:0A:00.0 Off \| N/A \|\n",
	"\| 0% 57C P2 55W / 165W \| 7354MiB / 16380MiB \| 33% Default \|\n",
	"\| \| \| N/A \|\n",
	"+-----------------------------------------+----------------------+----------------------+\n",
	"```\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "818f88ec-355c-4ebd-9df4-27de3420fa19",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python (wizmath)",
	"language": "python",
	"name": "conda-env-wizmath-py"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.10.13"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}