Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save KevinZonda/b1fb98e213760ddf7352a3aeced09fa3 to your computer and use it in GitHub Desktop.
Save KevinZonda/b1fb98e213760ddf7352a3aeced09fa3 to your computer and use it in GitHub Desktop.
chatglm-openai-api.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "standard",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"d5e90ca28fa84e449654b3410fa1d9b1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_429ac883ced94e6a8523c88425a8375a",
"IPY_MODEL_76ddb3d32b5847a18c45d8ddcfab1a41",
"IPY_MODEL_515786e0edf2437c802f0d15a32ef9fc"
],
"layout": "IPY_MODEL_30bfe0086adc4c65b559628053efd235"
}
},
"429ac883ced94e6a8523c88425a8375a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a4f8604338bb46818873e0268e24e3a7",
"placeholder": "​",
"style": "IPY_MODEL_d501bf48c1304e8d87b34cb871e29142",
"value": "Downloading pytorch_model.bin: 100%"
}
},
"76ddb3d32b5847a18c45d8ddcfab1a41": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9279389623ca4f2eaf6efd70df01ba8f",
"max": 3893083075,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_9e087b2067e548bda20691817c3f5189",
"value": 3893083075
}
},
"515786e0edf2437c802f0d15a32ef9fc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_0ff9a42d48334182b9b59a304c29b702",
"placeholder": "​",
"style": "IPY_MODEL_920ce7efef9d4770b566010b556c367f",
"value": " 3.89G/3.89G [01:25<00:00, 60.6MB/s]"
}
},
"30bfe0086adc4c65b559628053efd235": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a4f8604338bb46818873e0268e24e3a7": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d501bf48c1304e8d87b34cb871e29142": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"9279389623ca4f2eaf6efd70df01ba8f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9e087b2067e548bda20691817c3f5189": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"0ff9a42d48334182b9b59a304c29b702": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"920ce7efef9d4770b566010b556c367f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"## 安装依赖"
],
"metadata": {
"id": "MZyEDzEHyDcN"
}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "eNbQgYuLxp-3",
"outputId": "179d7305-a7d0-4da2-a2f0-ef8eeabe5d2a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: protobuf==3.20.0 in /usr/local/lib/python3.9/dist-packages (3.20.0)\n",
"Requirement already satisfied: transformers==4.27.1 in /usr/local/lib/python3.9/dist-packages (4.27.1)\n",
"Collecting icetk\n",
" Using cached icetk-0.0.7-py3-none-any.whl (16 kB)\n",
"Requirement already satisfied: cpm_kernels in /usr/local/lib/python3.9/dist-packages (1.0.11)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.9/dist-packages (2.0.0+cu118)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (0.13.4)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (23.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (6.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (2022.10.31)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (4.65.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (3.11.0)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (2.27.1)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (0.13.3)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (1.22.4)\n",
"Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from icetk) (0.15.1+cu118)\n",
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from icetk) (0.1.98)\n",
" Using cached icetk-0.0.6-py3-none-any.whl (15 kB)\n",
" Using cached icetk-0.0.5-py3-none-any.whl (15 kB)\n",
" Using cached icetk-0.0.4-py3-none-any.whl (15 kB)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch) (1.11.1)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch) (3.1)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.9/dist-packages (from torch) (4.5.0)\n",
"Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch) (2.0.0)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch) (3.1.2)\n",
"Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (3.25.2)\n",
"Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (16.0.1)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch) (2.1.2)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (2022.12.7)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (3.4)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (1.26.15)\n",
"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (2.0.12)\n",
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch) (1.3.0)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.9/dist-packages (from torchvision->icetk) (8.4.0)\n",
"Installing collected packages: icetk\n",
"Successfully installed icetk-0.0.4\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting fastapi\n",
" Using cached fastapi-0.95.1-py3-none-any.whl (56 kB)\n",
"Requirement already satisfied: pydantic in /usr/local/lib/python3.9/dist-packages (1.10.7)\n",
"Collecting uvicorn\n",
" Using cached uvicorn-0.21.1-py3-none-any.whl (57 kB)\n",
"Collecting sse_starlette\n",
" Using cached sse_starlette-1.3.4-py3-none-any.whl (8.9 kB)\n",
"Collecting pyngrok\n",
" Using cached pyngrok-6.0.0.tar.gz (681 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
"Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.9/dist-packages (1.5.6)\n",
"Collecting starlette<0.27.0,>=0.26.1\n",
" Using cached starlette-0.26.1-py3-none-any.whl (66 kB)\n",
"Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic) (4.5.0)\n",
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn) (8.1.3)\n",
"Collecting h11>=0.8\n",
" Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n",
"Requirement already satisfied: PyYAML in /usr/local/lib/python3.9/dist-packages (from pyngrok) (6.0)\n",
"Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.9/dist-packages (from starlette<0.27.0,>=0.26.1->fastapi) (3.6.2)\n",
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi) (1.3.0)\n",
"Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi) (3.4)\n",
"Building wheels for collected packages: pyngrok\n",
" Building wheel for pyngrok (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
" Created wheel for pyngrok: filename=pyngrok-6.0.0-py3-none-any.whl size=19879 sha256=4158360e5a2c7bc1519a6b2a9b98e8595d14000bc0d5a60320ce0ea422ad04d4\n",
" Stored in directory: /root/.cache/pip/wheels/31/49/9c/44b13823eb256a3b4dff34b972f7a3c7d9910bfef269e59bd7\n",
"Successfully built pyngrok\n",
"Installing collected packages: pyngrok, h11, uvicorn, starlette, sse_starlette, fastapi\n",
"Successfully installed fastapi-0.95.1 h11-0.14.0 pyngrok-6.0.0 sse_starlette-1.3.4 starlette-0.26.1 uvicorn-0.21.1\n"
]
}
],
"source": [
"import locale\n",
"locale.getpreferredencoding = lambda: \"UTF-8\"\n",
"\n",
"!pip install protobuf==3.20.0 transformers==4.27.1 icetk cpm_kernels torch\n",
"!pip install fastapi pydantic uvicorn sse_starlette pyngrok nest-asyncio"
]
},
{
"cell_type": "markdown",
"source": [
"## 环境配置"
],
"metadata": {
"id": "VaY7_gNOzdoG"
}
},
{
"cell_type": "code",
"source": [
"chatglm_models = [\n",
" \"THUDM/chatglm-6b\", # 原始模型\n",
" \"THUDM/chatglm-6b-int8\", # int8 量化\n",
" \"THUDM/chatglm-6b-int4\", # int4 量化\n",
"]\n",
"\n",
"CHATGLM_MODEL = \"THUDM/chatglm-6b-int4\"\n",
"\n",
"# GPU/CPU\n",
"RUNNING_DEVICE = \"GPU\"\n",
"\n",
"# API_TOKEN\n",
"TOKEN = \"token1\"\n"
],
"metadata": {
"id": "JJOlpnVOyIeG"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## 启动模型"
],
"metadata": {
"id": "CFzHRCMYz08N"
}
},
{
"cell_type": "code",
"source": [
"from transformers import AutoModel, AutoTokenizer\n",
"\n",
"def init_chatglm(model_name: str, running_device: str):\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n",
" model = AutoModel.from_pretrained(model_name, trust_remote_code=True)\n",
"\n",
" if running_device == \"GPU\":\n",
" model = model.half().cuda()\n",
" else:\n",
" model = model.float()\n",
" model.eval()\n",
" return tokenizer, model\n",
"\n",
"tokenizer, model = init_chatglm(CHATGLM_MODEL, RUNNING_DEVICE)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 258,
"referenced_widgets": [
"d5e90ca28fa84e449654b3410fa1d9b1",
"429ac883ced94e6a8523c88425a8375a",
"76ddb3d32b5847a18c45d8ddcfab1a41",
"515786e0edf2437c802f0d15a32ef9fc",
"30bfe0086adc4c65b559628053efd235",
"a4f8604338bb46818873e0268e24e3a7",
"d501bf48c1304e8d87b34cb871e29142",
"9279389623ca4f2eaf6efd70df01ba8f",
"9e087b2067e548bda20691817c3f5189",
"0ff9a42d48334182b9b59a304c29b702",
"920ce7efef9d4770b566010b556c367f"
]
},
"id": "VIT1nTv_1a4X",
"outputId": "1f332557-080b-4bac-ac10-720208568a1e"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.\n",
"Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.\n",
"Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading pytorch_model.bin: 0%| | 0.00/3.89G [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "d5e90ca28fa84e449654b3410fa1d9b1"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"No compiled kernel found.\n",
"Compiling kernels : /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.c\n",
"Compiling gcc -O3 -fPIC -std=c99 /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.c -shared -o /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.so\n",
"Load kernel : /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.so\n",
"Using quantization cache\n",
"Applying quantization to glm layers\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## 测试模型"
],
"metadata": {
"id": "b5RuRcsD3hPw"
}
},
{
"cell_type": "code",
"source": [
"response, history = model.chat(tokenizer, \"你好\", history=[])\n",
"print(response)\n",
"print(history)\n",
"response, history = model.chat(tokenizer, \"很高兴认识你\", history=history)\n",
"print(response)\n",
"print(history)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "ZFY3ju-N3gk1",
"outputId": "889514c3-f5e7-4ae6-d0ba-99ac3c9d1df0"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:transformers_modules.THUDM.chatglm-6b-int4.e02ba894cf18f3fd9b2526c795f983683c4ec732.modeling_chatglm:The dtype of attention mask (torch.int64) is not bool\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。\n",
"[('你好', '你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。')]\n",
"谢谢你的夸奖,我很高兴能够和你交流。如果你有任何问题或需要帮助,请随时告诉我,我会尽力回答和帮助。\n",
"[('你好', '你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。'), ('很高兴认识你', '谢谢你的夸奖,我很高兴能够和你交流。如果你有任何问题或需要帮助,请随时告诉我,我会尽力回答和帮助。')]\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## 启动服务器(with tunnel)"
],
"metadata": {
"id": "LnexGhqK39NJ"
}
},
{
"cell_type": "code",
"source": [
"import torch\n",
"from fastapi import FastAPI, Request, status, HTTPException\n",
"from fastapi.responses import JSONResponse\n",
"from pydantic import BaseModel\n",
"from sse_starlette.sse import EventSourceResponse\n",
"from fastapi.middleware.cors import CORSMiddleware\n",
"import uvicorn\n",
"import json\n",
"from typing import List, Optional\n",
"\n",
"\n",
"# 参考 https://github.com/josStorer/selfhostedAI/blob/master/main.py\n",
"\n",
"def torch_gc():\n",
" if torch.cuda.is_available():\n",
" with torch.cuda.device(0):\n",
" torch.cuda.empty_cache()\n",
" torch.cuda.ipc_collect()\n",
"\n",
"\n",
"app = FastAPI()\n",
"\n",
"app.add_middleware(\n",
" CORSMiddleware,\n",
" allow_origins=['*'],\n",
" allow_credentials=True,\n",
" allow_methods=['*'],\n",
" allow_headers=['*'],\n",
")\n",
"\n",
"\n",
"class Message(BaseModel):\n",
" role: str\n",
" content: str\n",
"\n",
"\n",
"class Body(BaseModel):\n",
" messages: List[Message]\n",
" model: str\n",
" stream: Optional[bool] = False\n",
" max_tokens: Optional[int] = 256\n",
" temperature: Optional[float] = 0.95\n",
" top_p: Optional[float] = 0.7\n",
"\n",
"\n",
"\n",
"@app.get(\"/\")\n",
"def read_root():\n",
" return {\"Hello\": \"World!\"}\n",
"\n",
"@app.get(\"/v1/models\")\n",
"def get_models():\n",
" return {\"data\": [\n",
" {\n",
" \"created\": 1677610602,\n",
" \"id\": \"gpt-3.5-turbo\",\n",
" \"object\": \"model\",\n",
" \"owned_by\": \"openai\",\n",
" \"permission\": [\n",
" {\n",
" \"created\": 1680818747,\n",
" \"id\": \"modelperm-fTUZTbzFp7uLLTeMSo9ks6oT\",\n",
" \"object\": \"model_permission\",\n",
" \"allow_create_engine\": False,\n",
" \"allow_sampling\": True,\n",
" \"allow_logprobs\": True,\n",
" \"allow_search_indices\": False,\n",
" \"allow_view\": True,\n",
" \"allow_fine_tuning\": False,\n",
" \"organization\": \"*\",\n",
" \"group\": None,\n",
" \"is_blocking\": False\n",
" }\n",
" ],\n",
" \"root\": \"gpt-3.5-turbo\",\n",
" \"parent\": None,\n",
" },\n",
" ],\n",
" \"object\": \"list\"\n",
" }\n",
"\n",
"def generate_response(content: str):\n",
" return {\n",
" \"id\": \"chatcmpl-77PZm95TtxE0oYLRx3cxa6HtIDI7s\",\n",
" \"object\": \"chat.completion\",\n",
" \"created\": 1682000966,\n",
" \"model\": \"gpt-3.5-turbo-0301\",\n",
" \"usage\": {\n",
" \"prompt_tokens\": 10,\n",
" \"completion_tokens\": 10,\n",
" \"total_tokens\": 20,\n",
" },\n",
" \"choices\": [{\n",
" \"message\": {\"role\": \"assistant\", \"content\": content}, \"finish_reason\": \"stop\", \"index\": 0}\n",
" ]\n",
" }\n",
"\n",
"def generate_stream_response_start():\n",
" return {\"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\"object\":\"chat.completion.chunk\",\"created\":1682004627,\"model\":\"gpt-3.5-turbo-0301\",\"choices\":[{\"delta\":{\"role\":\"assistant\"},\"index\":0,\"finish_reason\":None}]}\n",
"\n",
"def generate_stream_response(content: str):\n",
" return {\n",
" \"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\n",
" \"object\":\"chat.completion.chunk\",\n",
" \"created\":1682004627,\n",
" \"model\":\"gpt-3.5-turbo-0301\",\n",
" \"choices\":[{\"delta\":{\"content\":content},\"index\":0,\"finish_reason\":None}\n",
" ]}\n",
"\n",
"def generate_stream_response_stop():\n",
" return {\"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\"object\":\"chat.completion.chunk\",\"created\":1682004627,\"model\":\"gpt-3.5-turbo-0301\",\"choices\":[{\"delta\":{},\"index\":0,\"finish_reason\":\"stop\"}]}\n",
"\n",
"@app.post(\"/v1/chat/completions\")\n",
"async def completions(body: Body, request: Request):\n",
" if request.headers.get(\"Authorization\").split(\" \")[1] != TOKEN:\n",
" raise HTTPException(status.HTTP_401_UNAUTHORIZED, \"Token is wrong!\")\n",
" \n",
" torch_gc()\n",
"\n",
" question = body.messages[-1]\n",
" if question.role == 'user':\n",
" question = question.content\n",
" else:\n",
" raise HTTPException(status.HTTP_400_BAD_REQUEST, \"No Question Found\")\n",
"\n",
" history = []\n",
" user_question = ''\n",
" for message in body.messages:\n",
" if message.role == 'system':\n",
" history.append((message.content, \"OK\"))\n",
" if message.role == 'user':\n",
" user_question = message.content\n",
" elif message.role == 'assistant':\n",
" assistant_answer = message.content\n",
" history.append((user_question, assistant_answer))\n",
"\n",
" print(f\"question = {question}, history = {history}\")\n",
"\n",
" \n",
" if body.stream:\n",
" async def eval_chatglm():\n",
" sends = 0\n",
" first = True\n",
" for response, _ in model.stream_chat(\n",
" tokenizer, question, history,\n",
" temperature=body.temperature,\n",
" top_p=body.top_p,\n",
" max_length=max(2048, body.max_tokens)):\n",
" if await request.is_disconnected():\n",
" return\n",
" ret = response[sends:]\n",
" sends = len(response)\n",
" if first:\n",
" first = False\n",
" yield json.dumps(generate_stream_response_start(), ensure_ascii=False)\n",
" yield json.dumps(generate_stream_response(ret), ensure_ascii=False)\n",
" yield json.dumps(generate_stream_response_stop(), ensure_ascii=False)\n",
" yield \"[DONE]\"\n",
" return EventSourceResponse(eval_chatglm(), ping=10000)\n",
" else:\n",
" response, _ = model.chat(\n",
" tokenizer, question, history,\n",
" temperature=body.temperature,\n",
" top_p=body.top_p,\n",
" max_length=max(2048, body.max_tokens))\n",
" print(f\"response: {response}\")\n",
" return JSONResponse(content=generate_response(response))"
],
"metadata": {
"id": "tX5oiLQJ4BTX"
},
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# 在 Notebook 中运行所需\n",
"import nest_asyncio\n",
"nest_asyncio.apply()\n",
"\n",
"from pyngrok import ngrok, conf\n",
"\n",
"# ngrok.set_auth_token(os.environ[\"ngrok_token\"])\n",
"http_tunnel = ngrok.connect(8000)\n",
"print(http_tunnel.public_url)\n",
"\n",
"uvicorn.run(app, port=8000)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6bPIXXdn8dG0",
"outputId": "78e1223c-b076-4183-e161-39bb1fa73d5f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": []
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:pyngrok.process.ngrok:t=2023-04-20T16:22:00+0000 lvl=warn msg=\"ngrok config file found at legacy location, move to XDG location\" xdg_path=/root/.config/ngrok/ngrok.yml legacy_path=/root/.ngrok2/ngrok.yml\n",
"Exception in thread Thread-11:\n",
"Traceback (most recent call last):\n",
" File \"/usr/lib/python3.9/threading.py\", line 980, in _bootstrap_inner\n",
" self.run()\n",
" File \"/usr/lib/python3.9/threading.py\", line 917, in run\n",
" self._target(*self._args, **self._kwargs)\n",
" File \"/usr/local/lib/python3.9/dist-packages/pyngrok/process.py\", line 146, in _monitor_process\n",
" self._log_line(self.proc.stdout.readline())\n",
" File \"/usr/lib/python3.9/encodings/ascii.py\", line 26, in decode\n",
" return codecs.ascii_decode(input, self.errors)[0]\n",
"UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 184: ordinal not in range(128)\n",
"INFO: Started server process [743]\n",
"INFO: Waiting for application startup.\n",
"INFO: Application startup complete.\n",
"INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"https://ca63-35-202-217-147.ngrok.io\n",
"INFO: 2400:56a0:1b2:1eab:7fc8:dc38:f0d7:a0db:0 - \"GET /v1/models HTTP/1.1\" 200 OK\n",
"question = 用Python写一个访问Twitter最新推文的脚本, history = []\n",
"INFO: 2400:56a0:1b2:1eab:7fc8:dc38:f0d7:a0db:0 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"## 访问\n",
"\n",
"### 非流式接口\n",
"\n",
"```bash\n",
"curl -vvv https://6d8f-130-211-208-193.ngrok.io/v1/chat/completions \\\n",
" -H \"Content-Type: application/json\" \\\n",
" -H \"Authorization: Bearer token1\" \\\n",
" -d '{ \"stream\": false,\n",
" \"model\": \"gpt-3.5-turbo\",\n",
" \"messages\": [{\"role\": \"user\", \"content\": \"写一首夏天的诗\"}]\n",
" }'\n",
"```\n",
"\n",
"输出:\n",
"```json\n",
"{\"id\":\"chatcmpl-77PZm95TtxE0oYLRx3cxa6HtIDI7s\",\"object\":\"chat.completion\",\"created\":1682000966,\"model\":\"gpt-3.5-turbo-0301\",\"usage\":{\"prompt_tokens\":10,\"completion_tokens\":10,\"total_tokens\":20},\"choices\":[{\"message\":{\"role\":\"assistant\",\"content\":\"夏日的阳光下,\\n树叶闪烁着翠绿的光芒,\\n蝉鸣声不断响起,\\n伴着鸟儿的欢快歌唱。\\n\\n人们穿着轻便的衣服,\\n享受这清凉的夏日时光,\\n在海滩上晒着太阳,\\n喝着清凉的饮料,\\n聊天、欢笑、无忧无虑。\\n\\n清晨的日出,\\n天边呈现出美丽的红色,\\n太阳慢慢地升起,\\n照耀着整个天空。\\n\\n在公园里漫步,\\n欣赏着花草树木的美丽,\\n夏日的天空和大地,\\n让人感到无限的快乐。\\n\\n夏日的风吹过,\\n带来了凉爽的感觉,\\n让人感受到生命的美好,\\n让人感受到夏日的热情。\\n\\n这是一个美好的季节,\\n一个充满欢乐和热情的夏日,\\n让我们珍惜这美好的时光,\\n享受这夏日带来的快乐。\"},\"finish_reason\":\"stop\",\"index\":0}]}\n",
"```\n",
"\n",
"### 流式接口\n",
"\n",
"```bash\n",
"curl -vvv https://6d8f-130-211-208-193.ngrok.io/v1/chat/completions \\\n",
" -H \"Content-Type: application/json\" \\\n",
" -H \"Authorization: Bearer token1\" \\\n",
" -d '{ \"stream\": true, \n",
" \"model\": \"gpt-3.5-turbo\",\n",
" \"messages\": [{\"role\": \"user\", \"content\": \"写一首夏天的诗\"}]\n",
" }'\n",
"```\n",
"\n",
"输出:\n",
"```json\n",
"\n",
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"难忘的\"}, \"index\": 0, \"finish_reason\": null}]}\n",
"\n",
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"夏日\"}, \"index\": 0, \"finish_reason\": null}]}\n",
"\n",
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"时光\"}, \"index\": 0, \"finish_reason\": null}]}\n",
"\n",
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"。\"}, \"index\": 0, \"finish_reason\": null}]}\n",
"\n",
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {}, \"index\": 0, \"finish_reason\": \"stop\"}]}\n",
"\n",
"data: [DONE]\n",
"```\n"
],
"metadata": {
"id": "xd6IIdSaIUhr"
}
},
{
"cell_type": "code",
"source": [
"# tips: colab 上 uvicorn的流式输出只有第一次运行notebook的时候才会有效,所以调试的时候可以用 exit()来强制重启 notebook (不删除运行时,从而避免重新下载模型文件)\n",
"\n",
"exit()"
],
"metadata": {
"id": "NJbR4SKIT4Hc"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment