fpaupier · February 4, 2024 08:03
diff --git a/test_config_llamacpp.ipynb b/test_config_llamacpp.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/fpaupier/d978e8809bc2b699df9ea3c12c433080/test_config_llamacpp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "uK5S-e8pirtu",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "7eefec5c-1075-476c-cee1-baa22fc11ac2"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m806.7/806.7 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m73.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m237.0/237.0 kB\u001b[0m \u001b[31m27.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.4/54.4 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h"
          ]
        }
      ],
      "source": [
        "%pip install --upgrade --quiet langchain"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install llama-cpp-python"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CJnq94b9nG_8",
        "outputId": "d2573a05-3e67-4c1f-92b7-e19d105c62d9"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting llama-cpp-python\n",
            "  Downloading llama_cpp_python-0.2.38.tar.gz (10.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.7/10.7 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (4.5.0)\n",
            "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (1.23.5)\n",
            "Requirement already satisfied: diskcache>=5.6.1 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (5.6.3)\n",
            "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (3.1.3)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.11.3->llama-cpp-python) (2.1.4)\n",
            "Building wheels for collected packages: llama-cpp-python\n",
            "  Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.2.38-cp310-cp310-manylinux_2_35_x86_64.whl size=9660777 sha256=d32b95c017da99ac20949bb4cfe1f81c28a740c455764d33b8e3b1630c64c747\n",
            "  Stored in directory: /root/.cache/pip/wheels/eb/58/77/20d3d9a235b4930050fbcde1ad4f0a4d054644269e801b08aa\n",
            "Successfully built llama-cpp-python\n",
            "Installing collected packages: llama-cpp-python\n",
            "Successfully installed llama-cpp-python-0.2.38\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install huggingface-hub"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "HJOTFctcoW8D",
        "outputId": "82ad2a6e-c0cb-45a5-e1f9-7841f74db4ff"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (0.20.3)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (3.13.1)\n",
            "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (2023.6.0)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (2.31.0)\n",
            "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (4.66.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (6.0.1)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (4.5.0)\n",
            "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (23.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (3.3.2)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (3.6)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (2.0.7)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (2023.11.17)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from langchain.callbacks.manager import CallbackManager\n",
        "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
        "from langchain_community.llms import LlamaCpp\n",
        "from langchain_core.runnables import ConfigurableField"
      ],
      "metadata": {
        "id": "LuFYfU2Nl1vd"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Callbacks support token-wise streaming\n",
        "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])"
      ],
      "metadata": {
        "id": "1etqPY4Jl35Q"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!huggingface-cli download TheBloke/zephyr-7B-beta-GGUF zephyr-7b-beta.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ndcXhjogobxY",
        "outputId": "100fe67c-026d-48df-bed6-6e1e6647eda5"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Consider using `hf_transfer` for faster downloads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n",
            "downloading https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_M.gguf to /root/.cache/huggingface/hub/tmpnq17hkse\n",
            "zephyr-7b-beta.Q4_K_M.gguf: 100% 4.37G/4.37G [00:19<00:00, 225MB/s]\n",
            "./zephyr-7b-beta.Q4_K_M.gguf\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "n_gpu_layers = -1  # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.\n",
        "n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n",
        "\n",
        "config: dict = {\n",
        "    'model_path': \"./zephyr-7b-beta.Q4_K_M.gguf\",\n",
        "    'n_gpu_layers': n_gpu_layers,\n",
        "    'n_batch': n_batch,\n",
        "    'echo': True,\n",
        "    'callback_manager': callback_manager,\n",
        "    'verbose': True,  # Verbose is required to pass to the callback manager\n",
        "    \"max_tokens\": 50,\n",
        "    \"temperature\": 0.1\n",
        "}\n",
        "\n",
        "\n",
        "# Make sure the model path is correct for your system!\n",
        "llm_a = LlamaCpp(**config).configurable_fields(\n",
        "    temperature=ConfigurableField(\n",
        "        id=\"llm_temperature\",\n",
        "        name=\"LLM Temperature\",\n",
        "        description=\"The temperature of the LLM\",\n",
        "    ),\n",
        "    max_tokens=ConfigurableField(\n",
        "        id=\"llm_max_tokens\",\n",
        "        name=\"LLM max output tokens\",\n",
        "        description=\"The maximum number of tokens to generate\",\n",
        "    ),\n",
        "    top_p=ConfigurableField(\n",
        "        id=\"llm_top_p\",\n",
        "        name=\"LLM top p\",\n",
        "        description=\"The top-p value to use for sampling\",\n",
        "    ),\n",
        "    top_k=ConfigurableField(\n",
        "        id=\"llm_top_k\",\n",
        "        name=\"LLM top-k\",\n",
        "        description=\"The top-k value to use for sampling\",\n",
        "    )\n",
        ")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mpKcqKIqnY0A",
        "outputId": "d5a2c0e8-208e-47bb-9ec6-cd3f7cd39684"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "AVX = 1 | AVX_VNNI = 0 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | \n",
            "Model metadata: {'tokenizer.ggml.padding_token_id': '2', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '32768', 'general.name': 'huggingfaceh4_zephyr-7b-beta', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '14336', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.dimension_count': '128', 'tokenizer.ggml.bos_token_id': '1', 'llama.attention.head_count': '32', 'llama.block_count': '32', 'llama.attention.head_count_kv': '8', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '15'}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "llm_a.invoke(\"tell a funny joke\")"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 129
        },
        "id": "oWxhCoMOYi7i",
        "outputId": "8898b429-e399-49a9-eb3c-03e75b7136fb"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            " about a person who is afraid of water\n",
            "\n",
            "I once knew a guy who was so scared of water that he would carry an umbrella with him everywhere, just in case it started raining. But the real kicker was when I found out that he also had a fear of coffee cups, because they sometimes have water in the bottom!"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "' about a person who is afraid of water\\n\\nI once knew a guy who was so scared of water that he would carry an umbrella with him everywhere, just in case it started raining. But the real kicker was when I found out that he also had a fear of coffee cups, because they sometimes have water in the bottom!'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "llm_a.with_config(configurable={\n",
        "     \"llm_temperature\": 0.9,\n",
        "     \"llm_top_p\": 0.9,\n",
        "     \"llm_top_k\": 0.2,\n",
        "     \"llm_max_tokens\": 15,\n",
        "}).invoke(\"pick a random number\")"
      ],
      "metadata": {
        "id": "lDU3hOtonblH",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 278
        },
        "outputId": "1e9c1f8e-e82b-4cdf-99ef-099391118dd3"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "error",
          "ename": "ConfigError",
          "evalue": "field \"grammar\" not yet prepared so type is still a ForwardRef, you might need to call LlamaCpp.update_forward_refs().",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mConfigError\u001b[0m                               Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-10-16b4c6671e0e>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m      \u001b[0;34m\"llm_top_k\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m      \u001b[0;34m\"llm_max_tokens\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m15\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m }).invoke(\"pick a random number\")\n\u001b[0m",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m   4039\u001b[0m         \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mAny\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4040\u001b[0m     ) -> Output:\n\u001b[0;32m-> 4041\u001b[0;31m         return self.bound.invoke(\n\u001b[0m\u001b[1;32m   4042\u001b[0m             \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4043\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_merge_configs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/configurable.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m     92\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mInput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mRunnableConfig\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     93\u001b[0m     ) -> Output:\n\u001b[0;32m---> 94\u001b[0;31m         \u001b[0mrunnable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     95\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mrunnable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minvoke\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     96\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/configurable.py\u001b[0m in \u001b[0;36m_prepare\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m    289\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mconfigurable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    290\u001b[0m             return (\n\u001b[0;32m--> 291\u001b[0;31m                 \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dict__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mconfigurable\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    292\u001b[0m                 \u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    293\u001b[0m             )\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/load/serializable.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m    105\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    106\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 107\u001b[0;31m         \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    108\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lc_kwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    109\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pydantic/main.cpython-310-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mpydantic.main.BaseModel.__init__\u001b[0;34m()\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pydantic/main.cpython-310-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mpydantic.main.validate_model\u001b[0;34m()\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pydantic/fields.cpython-310-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mpydantic.fields.ModelField.validate\u001b[0;34m()\u001b[0m\n",
            "\u001b[0;31mConfigError\u001b[0m: field \"grammar\" not yet prepared so type is still a ForwardRef, you might need to call LlamaCpp.update_forward_refs()."
          ]
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"gpuType": "T4",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/fpaupier/d978e8809bc2b699df9ea3c12c433080/test_config_llamacpp.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"id": "uK5S-e8pirtu",
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"outputId": "7eefec5c-1075-476c-cee1-baa22fc11ac2"
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m806.7/806.7 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m73.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m237.0/237.0 kB\u001b[0m \u001b[31m27.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.4/54.4 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25h"
	]
	}
	],
	"source": [
	"%pip install --upgrade --quiet langchain"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install llama-cpp-python"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "CJnq94b9nG_8",
	"outputId": "d2573a05-3e67-4c1f-92b7-e19d105c62d9"
	},
	"execution_count": 2,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Collecting llama-cpp-python\n",
	" Downloading llama_cpp_python-0.2.38.tar.gz (10.7 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.7/10.7 MB\u001b[0m \u001b[31m84.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
	" Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
	" Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
	" Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
	"Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (4.5.0)\n",
	"Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (1.23.5)\n",
	"Requirement already satisfied: diskcache>=5.6.1 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (5.6.3)\n",
	"Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python) (3.1.3)\n",
	"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2>=2.11.3->llama-cpp-python) (2.1.4)\n",
	"Building wheels for collected packages: llama-cpp-python\n",
	" Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
	" Created wheel for llama-cpp-python: filename=llama_cpp_python-0.2.38-cp310-cp310-manylinux_2_35_x86_64.whl size=9660777 sha256=d32b95c017da99ac20949bb4cfe1f81c28a740c455764d33b8e3b1630c64c747\n",
	" Stored in directory: /root/.cache/pip/wheels/eb/58/77/20d3d9a235b4930050fbcde1ad4f0a4d054644269e801b08aa\n",
	"Successfully built llama-cpp-python\n",
	"Installing collected packages: llama-cpp-python\n",
	"Successfully installed llama-cpp-python-0.2.38\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"!pip install huggingface-hub"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "HJOTFctcoW8D",
	"outputId": "82ad2a6e-c0cb-45a5-e1f9-7841f74db4ff"
	},
	"execution_count": 3,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (0.20.3)\n",
	"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (3.13.1)\n",
	"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (2023.6.0)\n",
	"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (2.31.0)\n",
	"Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (4.66.1)\n",
	"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (6.0.1)\n",
	"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (4.5.0)\n",
	"Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub) (23.2)\n",
	"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (3.3.2)\n",
	"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (3.6)\n",
	"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (2.0.7)\n",
	"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub) (2023.11.17)\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"from langchain.callbacks.manager import CallbackManager\n",
	"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
	"from langchain_community.llms import LlamaCpp\n",
	"from langchain_core.runnables import ConfigurableField"
	],
	"metadata": {
	"id": "LuFYfU2Nl1vd"
	},
	"execution_count": 4,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Callbacks support token-wise streaming\n",
	"callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])"
	],
	"metadata": {
	"id": "1etqPY4Jl35Q"
	},
	"execution_count": 5,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"!huggingface-cli download TheBloke/zephyr-7B-beta-GGUF zephyr-7b-beta.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "ndcXhjogobxY",
	"outputId": "100fe67c-026d-48df-bed6-6e1e6647eda5"
	},
	"execution_count": 6,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Consider using `hf_transfer` for faster downloads. This solution comes with some limitations. See https://huggingface.co/docs/huggingface_hub/hf_transfer for more details.\n",
	"downloading https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q4_K_M.gguf to /root/.cache/huggingface/hub/tmpnq17hkse\n",
	"zephyr-7b-beta.Q4_K_M.gguf: 100% 4.37G/4.37G [00:19<00:00, 225MB/s]\n",
	"./zephyr-7b-beta.Q4_K_M.gguf\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"n_gpu_layers = -1 # The number of layers to put on the GPU. The rest will be on the CPU. If you don't know how many layers there are, you can use -1 to move all to GPU.\n",
	"n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n",
	"\n",
	"config: dict = {\n",
	" 'model_path': \"./zephyr-7b-beta.Q4_K_M.gguf\",\n",
	" 'n_gpu_layers': n_gpu_layers,\n",
	" 'n_batch': n_batch,\n",
	" 'echo': True,\n",
	" 'callback_manager': callback_manager,\n",
	" 'verbose': True, # Verbose is required to pass to the callback manager\n",
	" \"max_tokens\": 50,\n",
	" \"temperature\": 0.1\n",
	"}\n",
	"\n",
	"\n",
	"# Make sure the model path is correct for your system!\n",
	"llm_a = LlamaCpp(**config).configurable_fields(\n",
	" temperature=ConfigurableField(\n",
	" id=\"llm_temperature\",\n",
	" name=\"LLM Temperature\",\n",
	" description=\"The temperature of the LLM\",\n",
	" ),\n",
	" max_tokens=ConfigurableField(\n",
	" id=\"llm_max_tokens\",\n",
	" name=\"LLM max output tokens\",\n",
	" description=\"The maximum number of tokens to generate\",\n",
	" ),\n",
	" top_p=ConfigurableField(\n",
	" id=\"llm_top_p\",\n",
	" name=\"LLM top p\",\n",
	" description=\"The top-p value to use for sampling\",\n",
	" ),\n",
	" top_k=ConfigurableField(\n",
	" id=\"llm_top_k\",\n",
	" name=\"LLM top-k\",\n",
	" description=\"The top-k value to use for sampling\",\n",
	" )\n",
	")"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "mpKcqKIqnY0A",
	"outputId": "d5a2c0e8-208e-47bb-9ec6-cd3f7cd39684"
	},
	"execution_count": 7,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stderr",
	"text": [
	"AVX = 1 \| AVX_VNNI = 0 \| AVX2 = 1 \| AVX512 = 1 \| AVX512_VBMI = 0 \| AVX512_VNNI = 0 \| FMA = 1 \| NEON = 0 \| ARM_FMA = 0 \| F16C = 1 \| FP16_VA = 0 \| WASM_SIMD = 0 \| BLAS = 1 \| SSE3 = 1 \| SSSE3 = 1 \| VSX = 0 \| \n",
	"Model metadata: {'tokenizer.ggml.padding_token_id': '2', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'general.architecture': 'llama', 'llama.rope.freq_base': '10000.000000', 'llama.context_length': '32768', 'general.name': 'huggingfaceh4_zephyr-7b-beta', 'llama.embedding_length': '4096', 'llama.feed_forward_length': '14336', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'llama.rope.dimension_count': '128', 'tokenizer.ggml.bos_token_id': '1', 'llama.attention.head_count': '32', 'llama.block_count': '32', 'llama.attention.head_count_kv': '8', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'llama', 'general.file_type': '15'}\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"llm_a.invoke(\"tell a funny joke\")"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 129
	},
	"id": "oWxhCoMOYi7i",
	"outputId": "8898b429-e399-49a9-eb3c-03e75b7136fb"
	},
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	" about a person who is afraid of water\n",
	"\n",
	"I once knew a guy who was so scared of water that he would carry an umbrella with him everywhere, just in case it started raining. But the real kicker was when I found out that he also had a fear of coffee cups, because they sometimes have water in the bottom!"
	]
	},
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"' about a person who is afraid of water\\n\\nI once knew a guy who was so scared of water that he would carry an umbrella with him everywhere, just in case it started raining. But the real kicker was when I found out that he also had a fear of coffee cups, because they sometimes have water in the bottom!'"
	],
	"application/vnd.google.colaboratory.intrinsic+json": {
	"type": "string"
	}
	},
	"metadata": {},
	"execution_count": 8
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"llm_a.with_config(configurable={\n",
	" \"llm_temperature\": 0.9,\n",
	" \"llm_top_p\": 0.9,\n",
	" \"llm_top_k\": 0.2,\n",
	" \"llm_max_tokens\": 15,\n",
	"}).invoke(\"pick a random number\")"
	],
	"metadata": {
	"id": "lDU3hOtonblH",
	"colab": {
	"base_uri": "https://localhost:8080/",
	"height": 278
	},
	"outputId": "1e9c1f8e-e82b-4cdf-99ef-099391118dd3"
	},
	"execution_count": 10,
	"outputs": [
	{
	"output_type": "error",
	"ename": "ConfigError",
	"evalue": "field \"grammar\" not yet prepared so type is still a ForwardRef, you might need to call LlamaCpp.update_forward_refs().",
	"traceback": [
	"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[0;31mConfigError\u001b[0m Traceback (most recent call last)",
	"\u001b[0;32m<ipython-input-10-16b4c6671e0e>\u001b[0m in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\"llm_top_k\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m0.2\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\"llm_max_tokens\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;36m15\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m }).invoke(\"pick a random number\")\n\u001b[0m",
	"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/base.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, kwargs)\u001b[0m\n\u001b[1;32m 4039\u001b[0m \u001b[0;34m\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mAny\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4040\u001b[0m ) -> Output:\n\u001b[0;32m-> 4041\u001b[0;31m return self.bound.invoke(\n\u001b[0m\u001b[1;32m 4042\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4043\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_merge_configs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/configurable.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, kwargs)\u001b[0m\n\u001b[1;32m 92\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minput\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mInput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mRunnableConfig\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 93\u001b[0m ) -> Output:\n\u001b[0;32m---> 94\u001b[0;31m \u001b[0mrunnable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 95\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mrunnable\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minvoke\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/runnables/configurable.py\u001b[0m in \u001b[0;36m_prepare\u001b[0;34m(self, config)\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mconfigurable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 290\u001b[0m return (\n\u001b[0;32m--> 291\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__class__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdefault\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dict__\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mconfigurable\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 292\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 293\u001b[0m )\n",
	"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/langchain_core/load/serializable.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, kwargs)\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 106\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 107\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 108\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lc_kwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 109\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
	"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pydantic/main.cpython-310-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mpydantic.main.BaseModel.__init__\u001b[0;34m()\u001b[0m\n",
	"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pydantic/main.cpython-310-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mpydantic.main.validate_model\u001b[0;34m()\u001b[0m\n",
	"\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pydantic/fields.cpython-310-x86_64-linux-gnu.so\u001b[0m in \u001b[0;36mpydantic.fields.ModelField.validate\u001b[0;34m()\u001b[0m\n",
	"\u001b[0;31mConfigError\u001b[0m: field \"grammar\" not yet prepared so type is still a ForwardRef, you might need to call LlamaCpp.update_forward_refs()."
	]
	}
	]
	}
	]
	}