KevinZonda · April 21, 2023 05:26
diff --git a/chatglm-openai-api.ipynb b/chatglm-openai-api.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "d5e90ca28fa84e449654b3410fa1d9b1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_429ac883ced94e6a8523c88425a8375a",
              "IPY_MODEL_76ddb3d32b5847a18c45d8ddcfab1a41",
              "IPY_MODEL_515786e0edf2437c802f0d15a32ef9fc"
            ],
            "layout": "IPY_MODEL_30bfe0086adc4c65b559628053efd235"
          }
        },
        "429ac883ced94e6a8523c88425a8375a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_a4f8604338bb46818873e0268e24e3a7",
            "placeholder": "",
            "style": "IPY_MODEL_d501bf48c1304e8d87b34cb871e29142",
            "value": "Downloading pytorch_model.bin: 100%"
          }
        },
        "76ddb3d32b5847a18c45d8ddcfab1a41": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9279389623ca4f2eaf6efd70df01ba8f",
            "max": 3893083075,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_9e087b2067e548bda20691817c3f5189",
            "value": 3893083075
          }
        },
        "515786e0edf2437c802f0d15a32ef9fc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0ff9a42d48334182b9b59a304c29b702",
            "placeholder": "",
            "style": "IPY_MODEL_920ce7efef9d4770b566010b556c367f",
            "value": " 3.89G/3.89G [01:25&lt;00:00, 60.6MB/s]"
          }
        },
        "30bfe0086adc4c65b559628053efd235": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a4f8604338bb46818873e0268e24e3a7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d501bf48c1304e8d87b34cb871e29142": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9279389623ca4f2eaf6efd70df01ba8f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9e087b2067e548bda20691817c3f5189": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "0ff9a42d48334182b9b59a304c29b702": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "920ce7efef9d4770b566010b556c367f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "## 安装依赖"
      ],
      "metadata": {
        "id": "MZyEDzEHyDcN"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "eNbQgYuLxp-3",
        "outputId": "179d7305-a7d0-4da2-a2f0-ef8eeabe5d2a"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: protobuf==3.20.0 in /usr/local/lib/python3.9/dist-packages (3.20.0)\n",
            "Requirement already satisfied: transformers==4.27.1 in /usr/local/lib/python3.9/dist-packages (4.27.1)\n",
            "Collecting icetk\n",
            "  Using cached icetk-0.0.7-py3-none-any.whl (16 kB)\n",
            "Requirement already satisfied: cpm_kernels in /usr/local/lib/python3.9/dist-packages (1.0.11)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.9/dist-packages (2.0.0+cu118)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (0.13.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (23.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (6.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (2022.10.31)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (4.65.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (3.11.0)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (2.27.1)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (0.13.3)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (1.22.4)\n",
            "Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from icetk) (0.15.1+cu118)\n",
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from icetk) (0.1.98)\n",
            "  Using cached icetk-0.0.6-py3-none-any.whl (15 kB)\n",
            "  Using cached icetk-0.0.5-py3-none-any.whl (15 kB)\n",
            "  Using cached icetk-0.0.4-py3-none-any.whl (15 kB)\n",
            "Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch) (1.11.1)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch) (3.1)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.9/dist-packages (from torch) (4.5.0)\n",
            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch) (2.0.0)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch) (3.1.2)\n",
            "Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
            "Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (16.0.1)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch) (2.1.2)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (2022.12.7)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (3.4)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (1.26.15)\n",
            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (2.0.12)\n",
            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch) (1.3.0)\n",
            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.9/dist-packages (from torchvision->icetk) (8.4.0)\n",
            "Installing collected packages: icetk\n",
            "Successfully installed icetk-0.0.4\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting fastapi\n",
            "  Using cached fastapi-0.95.1-py3-none-any.whl (56 kB)\n",
            "Requirement already satisfied: pydantic in /usr/local/lib/python3.9/dist-packages (1.10.7)\n",
            "Collecting uvicorn\n",
            "  Using cached uvicorn-0.21.1-py3-none-any.whl (57 kB)\n",
            "Collecting sse_starlette\n",
            "  Using cached sse_starlette-1.3.4-py3-none-any.whl (8.9 kB)\n",
            "Collecting pyngrok\n",
            "  Using cached pyngrok-6.0.0.tar.gz (681 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.9/dist-packages (1.5.6)\n",
            "Collecting starlette<0.27.0,>=0.26.1\n",
            "  Using cached starlette-0.26.1-py3-none-any.whl (66 kB)\n",
            "Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic) (4.5.0)\n",
            "Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn) (8.1.3)\n",
            "Collecting h11>=0.8\n",
            "  Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n",
            "Requirement already satisfied: PyYAML in /usr/local/lib/python3.9/dist-packages (from pyngrok) (6.0)\n",
            "Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.9/dist-packages (from starlette<0.27.0,>=0.26.1->fastapi) (3.6.2)\n",
            "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi) (1.3.0)\n",
            "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi) (3.4)\n",
            "Building wheels for collected packages: pyngrok\n",
            "  Building wheel for pyngrok (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pyngrok: filename=pyngrok-6.0.0-py3-none-any.whl size=19879 sha256=4158360e5a2c7bc1519a6b2a9b98e8595d14000bc0d5a60320ce0ea422ad04d4\n",
            "  Stored in directory: /root/.cache/pip/wheels/31/49/9c/44b13823eb256a3b4dff34b972f7a3c7d9910bfef269e59bd7\n",
            "Successfully built pyngrok\n",
            "Installing collected packages: pyngrok, h11, uvicorn, starlette, sse_starlette, fastapi\n",
            "Successfully installed fastapi-0.95.1 h11-0.14.0 pyngrok-6.0.0 sse_starlette-1.3.4 starlette-0.26.1 uvicorn-0.21.1\n"
          ]
        }
      ],
      "source": [
        "import locale\n",
        "locale.getpreferredencoding = lambda: \"UTF-8\"\n",
        "\n",
        "!pip install protobuf==3.20.0 transformers==4.27.1 icetk cpm_kernels torch\n",
        "!pip install fastapi pydantic uvicorn sse_starlette pyngrok nest-asyncio"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 环境配置"
      ],
      "metadata": {
        "id": "VaY7_gNOzdoG"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "chatglm_models = [\n",
        "    \"THUDM/chatglm-6b\",       # 原始模型\n",
        "    \"THUDM/chatglm-6b-int8\",  # int8 量化\n",
        "    \"THUDM/chatglm-6b-int4\",  # int4 量化\n",
        "]\n",
        "\n",
        "CHATGLM_MODEL = \"THUDM/chatglm-6b-int4\"\n",
        "\n",
        "# GPU/CPU\n",
        "RUNNING_DEVICE = \"GPU\"\n",
        "\n",
        "# API_TOKEN\n",
        "TOKEN = \"token1\"\n"
      ],
      "metadata": {
        "id": "JJOlpnVOyIeG"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 启动模型"
      ],
      "metadata": {
        "id": "CFzHRCMYz08N"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import AutoModel, AutoTokenizer\n",
        "\n",
        "def init_chatglm(model_name: str, running_device: str):\n",
        "    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
        "    model = AutoModel.from_pretrained(model_name, trust_remote_code=True)\n",
        "\n",
        "    if running_device == \"GPU\":\n",
        "        model = model.half().cuda()\n",
        "    else:\n",
        "        model = model.float()\n",
        "    model.eval()\n",
        "    return tokenizer, model\n",
        "\n",
        "tokenizer, model = init_chatglm(CHATGLM_MODEL, RUNNING_DEVICE)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 258,
          "referenced_widgets": [
            "d5e90ca28fa84e449654b3410fa1d9b1",
            "429ac883ced94e6a8523c88425a8375a",
            "76ddb3d32b5847a18c45d8ddcfab1a41",
            "515786e0edf2437c802f0d15a32ef9fc",
            "30bfe0086adc4c65b559628053efd235",
            "a4f8604338bb46818873e0268e24e3a7",
            "d501bf48c1304e8d87b34cb871e29142",
            "9279389623ca4f2eaf6efd70df01ba8f",
            "9e087b2067e548bda20691817c3f5189",
            "0ff9a42d48334182b9b59a304c29b702",
            "920ce7efef9d4770b566010b556c367f"
          ]
        },
        "id": "VIT1nTv_1a4X",
        "outputId": "1f332557-080b-4bac-ac10-720208568a1e"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.\n",
            "Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.\n",
            "Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading pytorch_model.bin:   0%|          | 0.00/3.89G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "d5e90ca28fa84e449654b3410fa1d9b1"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "No compiled kernel found.\n",
            "Compiling kernels : /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.c\n",
            "Compiling gcc -O3 -fPIC -std=c99 /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.c -shared -o /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.so\n",
            "Load kernel : /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.so\n",
            "Using quantization cache\n",
            "Applying quantization to glm layers\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 测试模型"
      ],
      "metadata": {
        "id": "b5RuRcsD3hPw"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "response, history = model.chat(tokenizer, \"你好\", history=[])\n",
        "print(response)\n",
        "print(history)\n",
        "response, history = model.chat(tokenizer, \"很高兴认识你\", history=history)\n",
        "print(response)\n",
        "print(history)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "ZFY3ju-N3gk1",
        "outputId": "889514c3-f5e7-4ae6-d0ba-99ac3c9d1df0"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "WARNING:transformers_modules.THUDM.chatglm-6b-int4.e02ba894cf18f3fd9b2526c795f983683c4ec732.modeling_chatglm:The dtype of attention mask (torch.int64) is not bool\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。\n",
            "[('你好', '你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。')]\n",
            "谢谢你的夸奖，我很高兴能够和你交流。如果你有任何问题或需要帮助，请随时告诉我，我会尽力回答和帮助。\n",
            "[('你好', '你好👋！我是人工智能助手 ChatGLM-6B，很高兴见到你，欢迎问我任何问题。'), ('很高兴认识你', '谢谢你的夸奖，我很高兴能够和你交流。如果你有任何问题或需要帮助，请随时告诉我，我会尽力回答和帮助。')]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 启动服务器（with tunnel）"
      ],
      "metadata": {
        "id": "LnexGhqK39NJ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "from fastapi import FastAPI, Request, status, HTTPException\n",
        "from fastapi.responses import JSONResponse\n",
        "from pydantic import BaseModel\n",
        "from sse_starlette.sse import EventSourceResponse\n",
        "from fastapi.middleware.cors import CORSMiddleware\n",
        "import uvicorn\n",
        "import json\n",
        "from typing import List, Optional\n",
        "\n",
        "\n",
        "# 参考 https://github.com/josStorer/selfhostedAI/blob/master/main.py\n",
        "\n",
        "def torch_gc():\n",
        "    if torch.cuda.is_available():\n",
        "        with torch.cuda.device(0):\n",
        "            torch.cuda.empty_cache()\n",
        "            torch.cuda.ipc_collect()\n",
        "\n",
        "\n",
        "app = FastAPI()\n",
        "\n",
        "app.add_middleware(\n",
        "    CORSMiddleware,\n",
        "    allow_origins=['*'],\n",
        "    allow_credentials=True,\n",
        "    allow_methods=['*'],\n",
        "    allow_headers=['*'],\n",
        ")\n",
        "\n",
        "\n",
        "class Message(BaseModel):\n",
        "    role: str\n",
        "    content: str\n",
        "\n",
        "\n",
        "class Body(BaseModel):\n",
        "    messages: List[Message]\n",
        "    model: str\n",
        "    stream: Optional[bool] = False\n",
        "    max_tokens: Optional[int] = 256\n",
        "    temperature: Optional[float] = 0.95\n",
        "    top_p: Optional[float] = 0.7\n",
        "\n",
        "\n",
        "\n",
        "@app.get(\"/\")\n",
        "def read_root():\n",
        "    return {\"Hello\": \"World!\"}\n",
        "\n",
        "@app.get(\"/v1/models\")\n",
        "def get_models():\n",
        "    return {\"data\": [\n",
        "      {\n",
        "        \"created\": 1677610602,\n",
        "        \"id\": \"gpt-3.5-turbo\",\n",
        "        \"object\": \"model\",\n",
        "        \"owned_by\": \"openai\",\n",
        "        \"permission\": [\n",
        "          {\n",
        "            \"created\": 1680818747,\n",
        "            \"id\": \"modelperm-fTUZTbzFp7uLLTeMSo9ks6oT\",\n",
        "            \"object\": \"model_permission\",\n",
        "            \"allow_create_engine\": False,\n",
        "            \"allow_sampling\": True,\n",
        "            \"allow_logprobs\": True,\n",
        "            \"allow_search_indices\": False,\n",
        "            \"allow_view\": True,\n",
        "            \"allow_fine_tuning\": False,\n",
        "            \"organization\": \"*\",\n",
        "            \"group\": None,\n",
        "            \"is_blocking\": False\n",
        "          }\n",
        "        ],\n",
        "        \"root\": \"gpt-3.5-turbo\",\n",
        "        \"parent\": None,\n",
        "      },\n",
        "    ],\n",
        "    \"object\": \"list\"\n",
        "  }\n",
        "\n",
        "def generate_response(content: str):\n",
        "    return {\n",
        "        \"id\": \"chatcmpl-77PZm95TtxE0oYLRx3cxa6HtIDI7s\",\n",
        "        \"object\": \"chat.completion\",\n",
        "        \"created\": 1682000966,\n",
        "        \"model\": \"gpt-3.5-turbo-0301\",\n",
        "        \"usage\": {\n",
        "            \"prompt_tokens\": 10,\n",
        "            \"completion_tokens\": 10,\n",
        "            \"total_tokens\": 20,\n",
        "        },\n",
        "        \"choices\": [{\n",
        "            \"message\": {\"role\": \"assistant\", \"content\": content}, \"finish_reason\": \"stop\", \"index\": 0}\n",
        "        ]\n",
        "    }\n",
        "\n",
        "def generate_stream_response_start():\n",
        "    return {\"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\"object\":\"chat.completion.chunk\",\"created\":1682004627,\"model\":\"gpt-3.5-turbo-0301\",\"choices\":[{\"delta\":{\"role\":\"assistant\"},\"index\":0,\"finish_reason\":None}]}\n",
        "\n",
        "def generate_stream_response(content: str):\n",
        "    return {\n",
        "        \"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\n",
        "        \"object\":\"chat.completion.chunk\",\n",
        "        \"created\":1682004627,\n",
        "        \"model\":\"gpt-3.5-turbo-0301\",\n",
        "        \"choices\":[{\"delta\":{\"content\":content},\"index\":0,\"finish_reason\":None}\n",
        "    ]}\n",
        "\n",
        "def generate_stream_response_stop():\n",
        "    return {\"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\"object\":\"chat.completion.chunk\",\"created\":1682004627,\"model\":\"gpt-3.5-turbo-0301\",\"choices\":[{\"delta\":{},\"index\":0,\"finish_reason\":\"stop\"}]}\n",
        "\n",
        "@app.post(\"/v1/chat/completions\")\n",
        "async def completions(body: Body, request: Request):\n",
        "    if request.headers.get(\"Authorization\").split(\" \")[1] != TOKEN:\n",
        "        raise HTTPException(status.HTTP_401_UNAUTHORIZED, \"Token is wrong!\")\n",
        "    \n",
        "    torch_gc()\n",
        "\n",
        "    question = body.messages[-1]\n",
        "    if question.role == 'user':\n",
        "        question = question.content\n",
        "    else:\n",
        "        raise HTTPException(status.HTTP_400_BAD_REQUEST, \"No Question Found\")\n",
        "\n",
        "    history = []\n",
        "    user_question = ''\n",
        "    for message in body.messages:\n",
        "        if message.role == 'system':\n",
        "            history.append((message.content, \"OK\"))\n",
        "        if message.role == 'user':\n",
        "            user_question = message.content\n",
        "        elif message.role == 'assistant':\n",
        "            assistant_answer = message.content\n",
        "            history.append((user_question, assistant_answer))\n",
        "\n",
        "    print(f\"question = {question}, history = {history}\")\n",
        "\n",
        "    \n",
        "    if body.stream:\n",
        "        async def eval_chatglm():\n",
        "            sends = 0\n",
        "            first = True\n",
        "            for response, _ in model.stream_chat(\n",
        "                tokenizer, question, history,\n",
        "                temperature=body.temperature,\n",
        "                top_p=body.top_p,\n",
        "                max_length=max(2048, body.max_tokens)):\n",
        "                if await request.is_disconnected():\n",
        "                    return\n",
        "                ret = response[sends:]\n",
        "                sends = len(response)\n",
        "                if first:\n",
        "                    first = False\n",
        "                    yield json.dumps(generate_stream_response_start(), ensure_ascii=False)\n",
        "                yield json.dumps(generate_stream_response(ret), ensure_ascii=False)\n",
        "            yield json.dumps(generate_stream_response_stop(), ensure_ascii=False)\n",
        "            yield \"[DONE]\"\n",
        "        return EventSourceResponse(eval_chatglm(), ping=10000)\n",
        "    else:\n",
        "        response, _ = model.chat(\n",
        "            tokenizer, question, history,\n",
        "            temperature=body.temperature,\n",
        "            top_p=body.top_p,\n",
        "            max_length=max(2048, body.max_tokens))\n",
        "        print(f\"response: {response}\")\n",
        "        return JSONResponse(content=generate_response(response))"
      ],
      "metadata": {
        "id": "tX5oiLQJ4BTX"
      },
      "execution_count": 5,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# 在 Notebook 中运行所需\n",
        "import nest_asyncio\n",
        "nest_asyncio.apply()\n",
        "\n",
        "from pyngrok import ngrok, conf\n",
        "\n",
        "# ngrok.set_auth_token(os.environ[\"ngrok_token\"])\n",
        "http_tunnel = ngrok.connect(8000)\n",
        "print(http_tunnel.public_url)\n",
        "\n",
        "uvicorn.run(app, port=8000)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6bPIXXdn8dG0",
        "outputId": "78e1223c-b076-4183-e161-39bb1fa73d5f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": []
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "WARNING:pyngrok.process.ngrok:t=2023-04-20T16:22:00+0000 lvl=warn msg=\"ngrok config file found at legacy location, move to XDG location\" xdg_path=/root/.config/ngrok/ngrok.yml legacy_path=/root/.ngrok2/ngrok.yml\n",
            "Exception in thread Thread-11:\n",
            "Traceback (most recent call last):\n",
            "  File \"/usr/lib/python3.9/threading.py\", line 980, in _bootstrap_inner\n",
            "    self.run()\n",
            "  File \"/usr/lib/python3.9/threading.py\", line 917, in run\n",
            "    self._target(*self._args, **self._kwargs)\n",
            "  File \"/usr/local/lib/python3.9/dist-packages/pyngrok/process.py\", line 146, in _monitor_process\n",
            "    self._log_line(self.proc.stdout.readline())\n",
            "  File \"/usr/lib/python3.9/encodings/ascii.py\", line 26, in decode\n",
            "    return codecs.ascii_decode(input, self.errors)[0]\n",
            "UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 184: ordinal not in range(128)\n",
            "INFO:     Started server process [743]\n",
            "INFO:     Waiting for application startup.\n",
            "INFO:     Application startup complete.\n",
            "INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "https://ca63-35-202-217-147.ngrok.io\n",
            "INFO:     2400:56a0:1b2:1eab:7fc8:dc38:f0d7:a0db:0 - \"GET /v1/models HTTP/1.1\" 200 OK\n",
            "question = 用Python写一个访问Twitter最新推文的脚本, history = []\n",
            "INFO:     2400:56a0:1b2:1eab:7fc8:dc38:f0d7:a0db:0 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## 访问\n",
        "\n",
        "### 非流式接口\n",
        "\n",
        "```bash\n",
        "curl -vvv https://6d8f-130-211-208-193.ngrok.io/v1/chat/completions \\\n",
        "  -H \"Content-Type: application/json\" \\\n",
        "  -H \"Authorization: Bearer token1\" \\\n",
        "  -d '{ \"stream\": false,\n",
        "    \"model\": \"gpt-3.5-turbo\",\n",
        "    \"messages\": [{\"role\": \"user\", \"content\": \"写一首夏天的诗\"}]\n",
        "  }'\n",
        "```\n",
        "\n",
        "输出：\n",
        "```json\n",
        "{\"id\":\"chatcmpl-77PZm95TtxE0oYLRx3cxa6HtIDI7s\",\"object\":\"chat.completion\",\"created\":1682000966,\"model\":\"gpt-3.5-turbo-0301\",\"usage\":{\"prompt_tokens\":10,\"completion_tokens\":10,\"total_tokens\":20},\"choices\":[{\"message\":{\"role\":\"assistant\",\"content\":\"夏日的阳光下，\\n树叶闪烁着翠绿的光芒，\\n蝉鸣声不断响起，\\n伴着鸟儿的欢快歌唱。\\n\\n人们穿着轻便的衣服，\\n享受这清凉的夏日时光，\\n在海滩上晒着太阳，\\n喝着清凉的饮料，\\n聊天、欢笑、无忧无虑。\\n\\n清晨的日出，\\n天边呈现出美丽的红色，\\n太阳慢慢地升起，\\n照耀着整个天空。\\n\\n在公园里漫步，\\n欣赏着花草树木的美丽，\\n夏日的天空和大地，\\n让人感到无限的快乐。\\n\\n夏日的风吹过，\\n带来了凉爽的感觉，\\n让人感受到生命的美好，\\n让人感受到夏日的热情。\\n\\n这是一个美好的季节，\\n一个充满欢乐和热情的夏日，\\n让我们珍惜这美好的时光，\\n享受这夏日带来的快乐。\"},\"finish_reason\":\"stop\",\"index\":0}]}\n",
        "```\n",
        "\n",
        "### 流式接口\n",
        "\n",
        "```bash\n",
        "curl -vvv https://6d8f-130-211-208-193.ngrok.io/v1/chat/completions \\\n",
        "  -H \"Content-Type: application/json\" \\\n",
        "  -H \"Authorization: Bearer token1\" \\\n",
        "  -d '{ \"stream\": true, \n",
        "    \"model\": \"gpt-3.5-turbo\",\n",
        "    \"messages\": [{\"role\": \"user\", \"content\": \"写一首夏天的诗\"}]\n",
        "  }'\n",
        "```\n",
        "\n",
        "输出：\n",
        "```json\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"难忘的\"}, \"index\": 0, \"finish_reason\": null}]}\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"夏日\"}, \"index\": 0, \"finish_reason\": null}]}\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"时光\"}, \"index\": 0, \"finish_reason\": null}]}\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"。\"}, \"index\": 0, \"finish_reason\": null}]}\n",
        "\n",
        "data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {}, \"index\": 0, \"finish_reason\": \"stop\"}]}\n",
        "\n",
        "data: [DONE]\n",
        "```\n"
      ],
      "metadata": {
        "id": "xd6IIdSaIUhr"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# tips: colab 上 uvicorn的流式输出只有第一次运行notebook的时候才会有效，所以调试的时候可以用 exit()来强制重启 notebook （不删除运行时，从而避免重新下载模型文件）\n",
        "\n",
        "exit()"
      ],
      "metadata": {
        "id": "NJbR4SKIT4Hc"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
 }