Created
September 23, 2024 15:38
-
-
Save tspannhw/cdcdbc3988de169eaac88f467d4d27a0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "b3604e62-911f-4f91-bc78-bbe201cc92e3", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/Users/timothyspann/Downloads/code/milvusvenv/lib/python3.12/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", | |
| " from tqdm.autonotebook import tqdm, trange\n", | |
| "/Users/timothyspann/Downloads/code/milvusvenv/lib/python3.12/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", | |
| " warnings.warn(\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import os\n", | |
| "from pymilvus import MilvusClient\n", | |
| "from langchain_community.document_loaders import PyPDFLoader\n", | |
| "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", | |
| "from langchain.chains import RetrievalQA\n", | |
| "from langchain_community.embeddings import HuggingFaceEmbeddings\n", | |
| "from langchain.callbacks.manager import CallbackManager\n", | |
| "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", | |
| "from langchain_community.llms import Ollama\n", | |
| "from langchain import hub\n", | |
| "from langchain_huggingface import HuggingFaceEmbeddings\n", | |
| "from langchain_community.vectorstores import Milvus\n", | |
| "from langchain import hub\n", | |
| "from langchain.chains import RetrievalQA\n", | |
| "from langchain_community.llms import Ollama\n", | |
| "from langchain.callbacks.manager import CallbackManager\n", | |
| "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", | |
| "import requests\n", | |
| "import sys\n", | |
| "import io\n", | |
| "import json\n", | |
| "import shutil\n", | |
| "import sys\n", | |
| "import datetime\n", | |
| "import subprocess\n", | |
| "import math\n", | |
| "import base64\n", | |
| "from time import gmtime, strftime\n", | |
| "import random, string\n", | |
| "import time\n", | |
| "import base64\n", | |
| "import uuid\n", | |
| "import socket\n", | |
| "from pymilvus import connections\n", | |
| "from pymilvus import utility\n", | |
| "from pymilvus import FieldSchema, CollectionSchema, DataType, Collection\n", | |
| "from pymilvus import MilvusClient\n", | |
| "from slack_sdk import WebClient\n", | |
| "from slack_sdk.errors import SlackApiError\n", | |
| "from dotenv import load_dotenv\n", | |
| "load_dotenv(verbose=True)\n", | |
| "from datetime import datetime, timedelta\n", | |
| "from langchain_milvus import Milvus\n", | |
| "\n", | |
| "DIMENSION = 384 \n", | |
| "TOKEN = os.environ.get(\"ZILLIZ_TOKEN\")\n", | |
| "MILVUS_URL = os.environ.get(\"SERVERLESS_VECTORDB_URI\")\n", | |
| "\n", | |
| "slack_token = os.environ.get(\"SLACK_BOT_TOKEN\")\n", | |
| "client = WebClient(token=slack_token)\n", | |
| "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", | |
| "\n", | |
| "COLLECTION_NAME = \"liveplanes\"\n", | |
| "TODAYS_DATE = str( datetime.today().strftime('%Y-%m-%d') ) \n", | |
| "YESTERDAYS_DATE = (datetime.now() - timedelta(1)).strftime('%Y-%m-%d')\n", | |
| "\n", | |
| "os.environ[\"LANGCHAIN_HUB_API_URL\"] = \"https://api.hub.langchain.com\"\n", | |
| "os.environ[\"LANGCHAIN_HUB_API_KEY\"] = os.environ.get(\"LANGCHAIN_HUB_API_KEY\")\n", | |
| "os.environ[\"LANGCHAIN_API_KEY\"] = os.environ.get(\"LANGCHAIN_HUB_API_KEY\")\n", | |
| "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", | |
| "os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.hub.langchain.com\"\n", | |
| "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"true\"\n", | |
| "\n", | |
| "### Turn off slack warnings\n", | |
| "os.environ[\"SKIP_SLACK_SDK_WARNING\"] = \"false\"\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "eec675b6-3bfb-47de-8062-c4a27ba47610", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# -----------------------------------------------------------------------------\n", | |
| "# Connect to Milvus\n", | |
| "\n", | |
| "vector_store = Milvus(\n", | |
| " embedding_function=embeddings,\n", | |
| " collection_name=COLLECTION_NAME,\n", | |
| " primary_field = \"id\",\n", | |
| " text_field=\"details\",\n", | |
| " vector_field = \"plane_text_vector\",\n", | |
| " connection_args={\"uri\": MILVUS_URL, \"token\": TOKEN},\n", | |
| ")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "5c5e4689-d0ea-42a5-b489-8d5ca90eae99", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def run_query() -> None:\n", | |
| " llm = Ollama(\n", | |
| " model=\"llama3.1\",\n", | |
| " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),\n", | |
| " stop=[\"<|eot_id|>\"],\n", | |
| " )\n", | |
| "\n", | |
| " query = input(\"\\nQuery: \")\n", | |
| " qa_chain = RetrievalQA.from_chain_type(\n", | |
| " llm, retriever=vector_store.as_retriever(collection = COLLECTION_NAME))\n", | |
| "\n", | |
| "\n", | |
| " # , expr=\"parameter == 'o3'\"\n", | |
| " result = qa_chain.invoke({\"query\": query})\n", | |
| " \n", | |
| " resultforslack = str(result[\"result\"])\n", | |
| " print(resultforslack)\n", | |
| "\n", | |
| " try:\n", | |
| " response = client.chat_postMessage(mrkdwn=True, channel=\"C06NE1FU6SE\", text=\"\", \n", | |
| " blocks=[{\"type\": \"section\",\"text\": {\"type\": \"mrkdwn\",\"text\": \"*\" + str(query) + \"* \\n\\n\" + str(resultforslack) +\"\\n\" }}])\n", | |
| "\n", | |
| " except SlackApiError as e:\n", | |
| " # You will get a SlackApiError if \"ok\" is False\n", | |
| " print(\"Slack failed\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "b2791e65-8cf4-4387-8ced-5ca83b35b621", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/var/folders/c2/6wq9klzn19d97gvw8f8j1jdr0000gn/T/ipykernel_82902/3689434376.py:2: DeprecationWarning: callback_manager is deprecated. Please use callbacks instead.\n", | |
| " llm = Ollama(\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdin", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Query: What are five Boeing from American Airlines flights?\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "I don't know. The provided context only mentions two flights of American Airlines, AAL1056 and one other flight (NA) but the aircraft type is not mentioned, it could be a Boeing or another manufacturer.I don't know. The provided context only mentions two flights of American Airlines, AAL1056 and one other flight (NA) but the aircraft type is not mentioned, it could be a Boeing or another manufacturer.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/var/folders/c2/6wq9klzn19d97gvw8f8j1jdr0000gn/T/ipykernel_82902/3689434376.py:2: DeprecationWarning: callback_manager is deprecated. Please use callbacks instead.\n", | |
| " llm = Ollama(\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdin", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Query: Describe everything you know about Boeing from American Airlines \n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Based on the provided context, here's what I can tell you about Boeing from American Airlines:\n", | |
| "\n", | |
| "* There are two aircraft types mentioned:\n", | |
| "\t+ B738 (Boeing 737-800)\n", | |
| "\t+ These two aircraft are both built by Boeing\n", | |
| "* They are operated by American Airlines (AAL) from the United States\n", | |
| "* The flight numbers of these aircraft are AAL541 and AAL1064Based on the provided context, here's what I can tell you about Boeing from American Airlines:\n", | |
| "\n", | |
| "* There are two aircraft types mentioned:\n", | |
| "\t+ B738 (Boeing 737-800)\n", | |
| "\t+ These two aircraft are both built by Boeing\n", | |
| "* They are operated by American Airlines (AAL) from the United States\n", | |
| "* The flight numbers of these aircraft are AAL541 and AAL1064\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/var/folders/c2/6wq9klzn19d97gvw8f8j1jdr0000gn/T/ipykernel_82902/3689434376.py:2: DeprecationWarning: callback_manager is deprecated. Please use callbacks instead.\n", | |
| " llm = Ollama(\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdin", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Query: Describe everything you know about flight AAL1056\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Here's what I can gather from the provided context:\n", | |
| "\n", | |
| "**Flight Information**\n", | |
| "\n", | |
| "* Flight number: AAL1056\n", | |
| "* Aircraft type: B738 (Boeing 737-800)\n", | |
| "* Operator: American Airlines\n", | |
| "* Country of origin: United States\n", | |
| "\n", | |
| "**Location and Altitude**\n", | |
| "\n", | |
| "* Location: approximately at 40.620071 latitude and -74.554321 longitude\n", | |
| "* Altitude:\n", | |
| "\t+ Barometric altitude: 19,125 feet\n", | |
| "\t+ Geodetic altitude: 19,725 feet\n", | |
| "\n", | |
| "**Speed and Time**\n", | |
| "\n", | |
| "* Speed: 414.6 mph (mph = miles per hour)\n", | |
| "* Date and time: September 22, 2024, at 15:44:30\n", | |
| "\n", | |
| "I don't know any additional information beyond what's provided in this context.Here's what I can gather from the provided context:\n", | |
| "\n", | |
| "**Flight Information**\n", | |
| "\n", | |
| "* Flight number: AAL1056\n", | |
| "* Aircraft type: B738 (Boeing 737-800)\n", | |
| "* Operator: American Airlines\n", | |
| "* Country of origin: United States\n", | |
| "\n", | |
| "**Location and Altitude**\n", | |
| "\n", | |
| "* Location: approximately at 40.620071 latitude and -74.554321 longitude\n", | |
| "* Altitude:\n", | |
| "\t+ Barometric altitude: 19,125 feet\n", | |
| "\t+ Geodetic altitude: 19,725 feet\n", | |
| "\n", | |
| "**Speed and Time**\n", | |
| "\n", | |
| "* Speed: 414.6 mph (mph = miles per hour)\n", | |
| "* Date and time: September 22, 2024, at 15:44:30\n", | |
| "\n", | |
| "I don't know any additional information beyond what's provided in this context.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/var/folders/c2/6wq9klzn19d97gvw8f8j1jdr0000gn/T/ipykernel_82902/3689434376.py:2: DeprecationWarning: callback_manager is deprecated. Please use callbacks instead.\n", | |
| " llm = Ollama(\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdin", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Query: Tell me everything about UAL2072\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Based on the provided context, here's everything I know about UAL2072:\n", | |
| "\n", | |
| "* Aircraft type: B753 (built by Boeing)\n", | |
| "* Operator: United Airlines (from the United States)\n", | |
| "* Flight number: UAL2072\n", | |
| "* Speed: 171.8 mph at 2024-09-22 15:44:38\n", | |
| "* Location: Latitude 40.317357, longitude -74.417641\n", | |
| "* Altitude:\n", | |
| "\t+ Barometric altitude: 2925 feet\n", | |
| "\t+ Geodetic altitude: 2950 feetBased on the provided context, here's everything I know about UAL2072:\n", | |
| "\n", | |
| "* Aircraft type: B753 (built by Boeing)\n", | |
| "* Operator: United Airlines (from the United States)\n", | |
| "* Flight number: UAL2072\n", | |
| "* Speed: 171.8 mph at 2024-09-22 15:44:38\n", | |
| "* Location: Latitude 40.317357, longitude -74.417641\n", | |
| "* Altitude:\n", | |
| "\t+ Barometric altitude: 2925 feet\n", | |
| "\t+ Geodetic altitude: 2950 feet\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/var/folders/c2/6wq9klzn19d97gvw8f8j1jdr0000gn/T/ipykernel_82902/3689434376.py:2: DeprecationWarning: callback_manager is deprecated. Please use callbacks instead.\n", | |
| " llm = Ollama(\n" | |
| ] | |
| } | |
| ], | |
| "source": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "31fe5766-42d3-4111-8e2e-ccb8f9628983", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment