-
-
Save KevinZonda/b1fb98e213760ddf7352a3aeced09fa3 to your computer and use it in GitHub Desktop.
chatglm-openai-api.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "GPU", | |
"gpuClass": "standard", | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"d5e90ca28fa84e449654b3410fa1d9b1": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HBoxModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HBoxView", | |
"box_style": "", | |
"children": [ | |
"IPY_MODEL_429ac883ced94e6a8523c88425a8375a", | |
"IPY_MODEL_76ddb3d32b5847a18c45d8ddcfab1a41", | |
"IPY_MODEL_515786e0edf2437c802f0d15a32ef9fc" | |
], | |
"layout": "IPY_MODEL_30bfe0086adc4c65b559628053efd235" | |
} | |
}, | |
"429ac883ced94e6a8523c88425a8375a": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_a4f8604338bb46818873e0268e24e3a7", | |
"placeholder": "", | |
"style": "IPY_MODEL_d501bf48c1304e8d87b34cb871e29142", | |
"value": "Downloading pytorch_model.bin: 100%" | |
} | |
}, | |
"76ddb3d32b5847a18c45d8ddcfab1a41": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "FloatProgressModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "ProgressView", | |
"bar_style": "success", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_9279389623ca4f2eaf6efd70df01ba8f", | |
"max": 3893083075, | |
"min": 0, | |
"orientation": "horizontal", | |
"style": "IPY_MODEL_9e087b2067e548bda20691817c3f5189", | |
"value": 3893083075 | |
} | |
}, | |
"515786e0edf2437c802f0d15a32ef9fc": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_dom_classes": [], | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "HTMLModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/controls", | |
"_view_module_version": "1.5.0", | |
"_view_name": "HTMLView", | |
"description": "", | |
"description_tooltip": null, | |
"layout": "IPY_MODEL_0ff9a42d48334182b9b59a304c29b702", | |
"placeholder": "", | |
"style": "IPY_MODEL_920ce7efef9d4770b566010b556c367f", | |
"value": " 3.89G/3.89G [01:25<00:00, 60.6MB/s]" | |
} | |
}, | |
"30bfe0086adc4c65b559628053efd235": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"a4f8604338bb46818873e0268e24e3a7": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"d501bf48c1304e8d87b34cb871e29142": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
}, | |
"9279389623ca4f2eaf6efd70df01ba8f": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"9e087b2067e548bda20691817c3f5189": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "ProgressStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"bar_color": null, | |
"description_width": "" | |
} | |
}, | |
"0ff9a42d48334182b9b59a304c29b702": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.2.0", | |
"_model_name": "LayoutModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "LayoutView", | |
"align_content": null, | |
"align_items": null, | |
"align_self": null, | |
"border": null, | |
"bottom": null, | |
"display": null, | |
"flex": null, | |
"flex_flow": null, | |
"grid_area": null, | |
"grid_auto_columns": null, | |
"grid_auto_flow": null, | |
"grid_auto_rows": null, | |
"grid_column": null, | |
"grid_gap": null, | |
"grid_row": null, | |
"grid_template_areas": null, | |
"grid_template_columns": null, | |
"grid_template_rows": null, | |
"height": null, | |
"justify_content": null, | |
"justify_items": null, | |
"left": null, | |
"margin": null, | |
"max_height": null, | |
"max_width": null, | |
"min_height": null, | |
"min_width": null, | |
"object_fit": null, | |
"object_position": null, | |
"order": null, | |
"overflow": null, | |
"overflow_x": null, | |
"overflow_y": null, | |
"padding": null, | |
"right": null, | |
"top": null, | |
"visibility": null, | |
"width": null | |
} | |
}, | |
"920ce7efef9d4770b566010b556c367f": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_model_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_model_name": "DescriptionStyleModel", | |
"_view_count": null, | |
"_view_module": "@jupyter-widgets/base", | |
"_view_module_version": "1.2.0", | |
"_view_name": "StyleView", | |
"description_width": "" | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## 安装依赖" | |
], | |
"metadata": { | |
"id": "MZyEDzEHyDcN" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "eNbQgYuLxp-3", | |
"outputId": "179d7305-a7d0-4da2-a2f0-ef8eeabe5d2a" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Requirement already satisfied: protobuf==3.20.0 in /usr/local/lib/python3.9/dist-packages (3.20.0)\n", | |
"Requirement already satisfied: transformers==4.27.1 in /usr/local/lib/python3.9/dist-packages (4.27.1)\n", | |
"Collecting icetk\n", | |
" Using cached icetk-0.0.7-py3-none-any.whl (16 kB)\n", | |
"Requirement already satisfied: cpm_kernels in /usr/local/lib/python3.9/dist-packages (1.0.11)\n", | |
"Requirement already satisfied: torch in /usr/local/lib/python3.9/dist-packages (2.0.0+cu118)\n", | |
"Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (0.13.4)\n", | |
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (23.1)\n", | |
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (6.0)\n", | |
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (2022.10.31)\n", | |
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (4.65.0)\n", | |
"Requirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (3.11.0)\n", | |
"Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (2.27.1)\n", | |
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (0.13.3)\n", | |
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers==4.27.1) (1.22.4)\n", | |
"Requirement already satisfied: torchvision in /usr/local/lib/python3.9/dist-packages (from icetk) (0.15.1+cu118)\n", | |
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.9/dist-packages (from icetk) (0.1.98)\n", | |
" Using cached icetk-0.0.6-py3-none-any.whl (15 kB)\n", | |
" Using cached icetk-0.0.5-py3-none-any.whl (15 kB)\n", | |
" Using cached icetk-0.0.4-py3-none-any.whl (15 kB)\n", | |
"Requirement already satisfied: sympy in /usr/local/lib/python3.9/dist-packages (from torch) (1.11.1)\n", | |
"Requirement already satisfied: networkx in /usr/local/lib/python3.9/dist-packages (from torch) (3.1)\n", | |
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.9/dist-packages (from torch) (4.5.0)\n", | |
"Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.9/dist-packages (from torch) (2.0.0)\n", | |
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.9/dist-packages (from torch) (3.1.2)\n", | |
"Requirement already satisfied: cmake in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (3.25.2)\n", | |
"Requirement already satisfied: lit in /usr/local/lib/python3.9/dist-packages (from triton==2.0.0->torch) (16.0.1)\n", | |
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.9/dist-packages (from jinja2->torch) (2.1.2)\n", | |
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (2022.12.7)\n", | |
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (3.4)\n", | |
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (1.26.15)\n", | |
"Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers==4.27.1) (2.0.12)\n", | |
"Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.9/dist-packages (from sympy->torch) (1.3.0)\n", | |
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.9/dist-packages (from torchvision->icetk) (8.4.0)\n", | |
"Installing collected packages: icetk\n", | |
"Successfully installed icetk-0.0.4\n", | |
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", | |
"Collecting fastapi\n", | |
" Using cached fastapi-0.95.1-py3-none-any.whl (56 kB)\n", | |
"Requirement already satisfied: pydantic in /usr/local/lib/python3.9/dist-packages (1.10.7)\n", | |
"Collecting uvicorn\n", | |
" Using cached uvicorn-0.21.1-py3-none-any.whl (57 kB)\n", | |
"Collecting sse_starlette\n", | |
" Using cached sse_starlette-1.3.4-py3-none-any.whl (8.9 kB)\n", | |
"Collecting pyngrok\n", | |
" Using cached pyngrok-6.0.0.tar.gz (681 kB)\n", | |
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
"Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.9/dist-packages (1.5.6)\n", | |
"Collecting starlette<0.27.0,>=0.26.1\n", | |
" Using cached starlette-0.26.1-py3-none-any.whl (66 kB)\n", | |
"Requirement already satisfied: typing-extensions>=4.2.0 in /usr/local/lib/python3.9/dist-packages (from pydantic) (4.5.0)\n", | |
"Requirement already satisfied: click>=7.0 in /usr/local/lib/python3.9/dist-packages (from uvicorn) (8.1.3)\n", | |
"Collecting h11>=0.8\n", | |
" Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", | |
"Requirement already satisfied: PyYAML in /usr/local/lib/python3.9/dist-packages (from pyngrok) (6.0)\n", | |
"Requirement already satisfied: anyio<5,>=3.4.0 in /usr/local/lib/python3.9/dist-packages (from starlette<0.27.0,>=0.26.1->fastapi) (3.6.2)\n", | |
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi) (1.3.0)\n", | |
"Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.9/dist-packages (from anyio<5,>=3.4.0->starlette<0.27.0,>=0.26.1->fastapi) (3.4)\n", | |
"Building wheels for collected packages: pyngrok\n", | |
" Building wheel for pyngrok (setup.py) ... \u001b[?25l\u001b[?25hdone\n", | |
" Created wheel for pyngrok: filename=pyngrok-6.0.0-py3-none-any.whl size=19879 sha256=4158360e5a2c7bc1519a6b2a9b98e8595d14000bc0d5a60320ce0ea422ad04d4\n", | |
" Stored in directory: /root/.cache/pip/wheels/31/49/9c/44b13823eb256a3b4dff34b972f7a3c7d9910bfef269e59bd7\n", | |
"Successfully built pyngrok\n", | |
"Installing collected packages: pyngrok, h11, uvicorn, starlette, sse_starlette, fastapi\n", | |
"Successfully installed fastapi-0.95.1 h11-0.14.0 pyngrok-6.0.0 sse_starlette-1.3.4 starlette-0.26.1 uvicorn-0.21.1\n" | |
] | |
} | |
], | |
"source": [ | |
"import locale\n", | |
"locale.getpreferredencoding = lambda: \"UTF-8\"\n", | |
"\n", | |
"!pip install protobuf==3.20.0 transformers==4.27.1 icetk cpm_kernels torch\n", | |
"!pip install fastapi pydantic uvicorn sse_starlette pyngrok nest-asyncio" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## 环境配置" | |
], | |
"metadata": { | |
"id": "VaY7_gNOzdoG" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"chatglm_models = [\n", | |
" \"THUDM/chatglm-6b\", # 原始模型\n", | |
" \"THUDM/chatglm-6b-int8\", # int8 量化\n", | |
" \"THUDM/chatglm-6b-int4\", # int4 量化\n", | |
"]\n", | |
"\n", | |
"CHATGLM_MODEL = \"THUDM/chatglm-6b-int4\"\n", | |
"\n", | |
"# GPU/CPU\n", | |
"RUNNING_DEVICE = \"GPU\"\n", | |
"\n", | |
"# API_TOKEN\n", | |
"TOKEN = \"token1\"\n" | |
], | |
"metadata": { | |
"id": "JJOlpnVOyIeG" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## 启动模型" | |
], | |
"metadata": { | |
"id": "CFzHRCMYz08N" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from transformers import AutoModel, AutoTokenizer\n", | |
"\n", | |
"def init_chatglm(model_name: str, running_device: str):\n", | |
" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)\n", | |
" model = AutoModel.from_pretrained(model_name, trust_remote_code=True)\n", | |
"\n", | |
" if running_device == \"GPU\":\n", | |
" model = model.half().cuda()\n", | |
" else:\n", | |
" model = model.float()\n", | |
" model.eval()\n", | |
" return tokenizer, model\n", | |
"\n", | |
"tokenizer, model = init_chatglm(CHATGLM_MODEL, RUNNING_DEVICE)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 258, | |
"referenced_widgets": [ | |
"d5e90ca28fa84e449654b3410fa1d9b1", | |
"429ac883ced94e6a8523c88425a8375a", | |
"76ddb3d32b5847a18c45d8ddcfab1a41", | |
"515786e0edf2437c802f0d15a32ef9fc", | |
"30bfe0086adc4c65b559628053efd235", | |
"a4f8604338bb46818873e0268e24e3a7", | |
"d501bf48c1304e8d87b34cb871e29142", | |
"9279389623ca4f2eaf6efd70df01ba8f", | |
"9e087b2067e548bda20691817c3f5189", | |
"0ff9a42d48334182b9b59a304c29b702", | |
"920ce7efef9d4770b566010b556c367f" | |
] | |
}, | |
"id": "VIT1nTv_1a4X", | |
"outputId": "1f332557-080b-4bac-ac10-720208568a1e" | |
}, | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.\n", | |
"Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.\n", | |
"Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.\n" | |
] | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"Downloading pytorch_model.bin: 0%| | 0.00/3.89G [00:00<?, ?B/s]" | |
], | |
"application/vnd.jupyter.widget-view+json": { | |
"version_major": 2, | |
"version_minor": 0, | |
"model_id": "d5e90ca28fa84e449654b3410fa1d9b1" | |
} | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"No compiled kernel found.\n", | |
"Compiling kernels : /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.c\n", | |
"Compiling gcc -O3 -fPIC -std=c99 /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.c -shared -o /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.so\n", | |
"Load kernel : /root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/e02ba894cf18f3fd9b2526c795f983683c4ec732/quantization_kernels.so\n", | |
"Using quantization cache\n", | |
"Applying quantization to glm layers\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## 测试模型" | |
], | |
"metadata": { | |
"id": "b5RuRcsD3hPw" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"response, history = model.chat(tokenizer, \"你好\", history=[])\n", | |
"print(response)\n", | |
"print(history)\n", | |
"response, history = model.chat(tokenizer, \"很高兴认识你\", history=history)\n", | |
"print(response)\n", | |
"print(history)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "ZFY3ju-N3gk1", | |
"outputId": "889514c3-f5e7-4ae6-d0ba-99ac3c9d1df0" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"WARNING:transformers_modules.THUDM.chatglm-6b-int4.e02ba894cf18f3fd9b2526c795f983683c4ec732.modeling_chatglm:The dtype of attention mask (torch.int64) is not bool\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。\n", | |
"[('你好', '你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。')]\n", | |
"谢谢你的夸奖,我很高兴能够和你交流。如果你有任何问题或需要帮助,请随时告诉我,我会尽力回答和帮助。\n", | |
"[('你好', '你好👋!我是人工智能助手 ChatGLM-6B,很高兴见到你,欢迎问我任何问题。'), ('很高兴认识你', '谢谢你的夸奖,我很高兴能够和你交流。如果你有任何问题或需要帮助,请随时告诉我,我会尽力回答和帮助。')]\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## 启动服务器(with tunnel)" | |
], | |
"metadata": { | |
"id": "LnexGhqK39NJ" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import torch\n", | |
"from fastapi import FastAPI, Request, status, HTTPException\n", | |
"from fastapi.responses import JSONResponse\n", | |
"from pydantic import BaseModel\n", | |
"from sse_starlette.sse import EventSourceResponse\n", | |
"from fastapi.middleware.cors import CORSMiddleware\n", | |
"import uvicorn\n", | |
"import json\n", | |
"from typing import List, Optional\n", | |
"\n", | |
"\n", | |
"# 参考 https://github.com/josStorer/selfhostedAI/blob/master/main.py\n", | |
"\n", | |
"def torch_gc():\n", | |
" if torch.cuda.is_available():\n", | |
" with torch.cuda.device(0):\n", | |
" torch.cuda.empty_cache()\n", | |
" torch.cuda.ipc_collect()\n", | |
"\n", | |
"\n", | |
"app = FastAPI()\n", | |
"\n", | |
"app.add_middleware(\n", | |
" CORSMiddleware,\n", | |
" allow_origins=['*'],\n", | |
" allow_credentials=True,\n", | |
" allow_methods=['*'],\n", | |
" allow_headers=['*'],\n", | |
")\n", | |
"\n", | |
"\n", | |
"class Message(BaseModel):\n", | |
" role: str\n", | |
" content: str\n", | |
"\n", | |
"\n", | |
"class Body(BaseModel):\n", | |
" messages: List[Message]\n", | |
" model: str\n", | |
" stream: Optional[bool] = False\n", | |
" max_tokens: Optional[int] = 256\n", | |
" temperature: Optional[float] = 0.95\n", | |
" top_p: Optional[float] = 0.7\n", | |
"\n", | |
"\n", | |
"\n", | |
"@app.get(\"/\")\n", | |
"def read_root():\n", | |
" return {\"Hello\": \"World!\"}\n", | |
"\n", | |
"@app.get(\"/v1/models\")\n", | |
"def get_models():\n", | |
" return {\"data\": [\n", | |
" {\n", | |
" \"created\": 1677610602,\n", | |
" \"id\": \"gpt-3.5-turbo\",\n", | |
" \"object\": \"model\",\n", | |
" \"owned_by\": \"openai\",\n", | |
" \"permission\": [\n", | |
" {\n", | |
" \"created\": 1680818747,\n", | |
" \"id\": \"modelperm-fTUZTbzFp7uLLTeMSo9ks6oT\",\n", | |
" \"object\": \"model_permission\",\n", | |
" \"allow_create_engine\": False,\n", | |
" \"allow_sampling\": True,\n", | |
" \"allow_logprobs\": True,\n", | |
" \"allow_search_indices\": False,\n", | |
" \"allow_view\": True,\n", | |
" \"allow_fine_tuning\": False,\n", | |
" \"organization\": \"*\",\n", | |
" \"group\": None,\n", | |
" \"is_blocking\": False\n", | |
" }\n", | |
" ],\n", | |
" \"root\": \"gpt-3.5-turbo\",\n", | |
" \"parent\": None,\n", | |
" },\n", | |
" ],\n", | |
" \"object\": \"list\"\n", | |
" }\n", | |
"\n", | |
"def generate_response(content: str):\n", | |
" return {\n", | |
" \"id\": \"chatcmpl-77PZm95TtxE0oYLRx3cxa6HtIDI7s\",\n", | |
" \"object\": \"chat.completion\",\n", | |
" \"created\": 1682000966,\n", | |
" \"model\": \"gpt-3.5-turbo-0301\",\n", | |
" \"usage\": {\n", | |
" \"prompt_tokens\": 10,\n", | |
" \"completion_tokens\": 10,\n", | |
" \"total_tokens\": 20,\n", | |
" },\n", | |
" \"choices\": [{\n", | |
" \"message\": {\"role\": \"assistant\", \"content\": content}, \"finish_reason\": \"stop\", \"index\": 0}\n", | |
" ]\n", | |
" }\n", | |
"\n", | |
"def generate_stream_response_start():\n", | |
" return {\"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\"object\":\"chat.completion.chunk\",\"created\":1682004627,\"model\":\"gpt-3.5-turbo-0301\",\"choices\":[{\"delta\":{\"role\":\"assistant\"},\"index\":0,\"finish_reason\":None}]}\n", | |
"\n", | |
"def generate_stream_response(content: str):\n", | |
" return {\n", | |
" \"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\n", | |
" \"object\":\"chat.completion.chunk\",\n", | |
" \"created\":1682004627,\n", | |
" \"model\":\"gpt-3.5-turbo-0301\",\n", | |
" \"choices\":[{\"delta\":{\"content\":content},\"index\":0,\"finish_reason\":None}\n", | |
" ]}\n", | |
"\n", | |
"def generate_stream_response_stop():\n", | |
" return {\"id\":\"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\",\"object\":\"chat.completion.chunk\",\"created\":1682004627,\"model\":\"gpt-3.5-turbo-0301\",\"choices\":[{\"delta\":{},\"index\":0,\"finish_reason\":\"stop\"}]}\n", | |
"\n", | |
"@app.post(\"/v1/chat/completions\")\n", | |
"async def completions(body: Body, request: Request):\n", | |
" if request.headers.get(\"Authorization\").split(\" \")[1] != TOKEN:\n", | |
" raise HTTPException(status.HTTP_401_UNAUTHORIZED, \"Token is wrong!\")\n", | |
" \n", | |
" torch_gc()\n", | |
"\n", | |
" question = body.messages[-1]\n", | |
" if question.role == 'user':\n", | |
" question = question.content\n", | |
" else:\n", | |
" raise HTTPException(status.HTTP_400_BAD_REQUEST, \"No Question Found\")\n", | |
"\n", | |
" history = []\n", | |
" user_question = ''\n", | |
" for message in body.messages:\n", | |
" if message.role == 'system':\n", | |
" history.append((message.content, \"OK\"))\n", | |
" if message.role == 'user':\n", | |
" user_question = message.content\n", | |
" elif message.role == 'assistant':\n", | |
" assistant_answer = message.content\n", | |
" history.append((user_question, assistant_answer))\n", | |
"\n", | |
" print(f\"question = {question}, history = {history}\")\n", | |
"\n", | |
" \n", | |
" if body.stream:\n", | |
" async def eval_chatglm():\n", | |
" sends = 0\n", | |
" first = True\n", | |
" for response, _ in model.stream_chat(\n", | |
" tokenizer, question, history,\n", | |
" temperature=body.temperature,\n", | |
" top_p=body.top_p,\n", | |
" max_length=max(2048, body.max_tokens)):\n", | |
" if await request.is_disconnected():\n", | |
" return\n", | |
" ret = response[sends:]\n", | |
" sends = len(response)\n", | |
" if first:\n", | |
" first = False\n", | |
" yield json.dumps(generate_stream_response_start(), ensure_ascii=False)\n", | |
" yield json.dumps(generate_stream_response(ret), ensure_ascii=False)\n", | |
" yield json.dumps(generate_stream_response_stop(), ensure_ascii=False)\n", | |
" yield \"[DONE]\"\n", | |
" return EventSourceResponse(eval_chatglm(), ping=10000)\n", | |
" else:\n", | |
" response, _ = model.chat(\n", | |
" tokenizer, question, history,\n", | |
" temperature=body.temperature,\n", | |
" top_p=body.top_p,\n", | |
" max_length=max(2048, body.max_tokens))\n", | |
" print(f\"response: {response}\")\n", | |
" return JSONResponse(content=generate_response(response))" | |
], | |
"metadata": { | |
"id": "tX5oiLQJ4BTX" | |
}, | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# 在 Notebook 中运行所需\n", | |
"import nest_asyncio\n", | |
"nest_asyncio.apply()\n", | |
"\n", | |
"from pyngrok import ngrok, conf\n", | |
"\n", | |
"# ngrok.set_auth_token(os.environ[\"ngrok_token\"])\n", | |
"http_tunnel = ngrok.connect(8000)\n", | |
"print(http_tunnel.public_url)\n", | |
"\n", | |
"uvicorn.run(app, port=8000)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "6bPIXXdn8dG0", | |
"outputId": "78e1223c-b076-4183-e161-39bb1fa73d5f" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"WARNING:pyngrok.process.ngrok:t=2023-04-20T16:22:00+0000 lvl=warn msg=\"ngrok config file found at legacy location, move to XDG location\" xdg_path=/root/.config/ngrok/ngrok.yml legacy_path=/root/.ngrok2/ngrok.yml\n", | |
"Exception in thread Thread-11:\n", | |
"Traceback (most recent call last):\n", | |
" File \"/usr/lib/python3.9/threading.py\", line 980, in _bootstrap_inner\n", | |
" self.run()\n", | |
" File \"/usr/lib/python3.9/threading.py\", line 917, in run\n", | |
" self._target(*self._args, **self._kwargs)\n", | |
" File \"/usr/local/lib/python3.9/dist-packages/pyngrok/process.py\", line 146, in _monitor_process\n", | |
" self._log_line(self.proc.stdout.readline())\n", | |
" File \"/usr/lib/python3.9/encodings/ascii.py\", line 26, in decode\n", | |
" return codecs.ascii_decode(input, self.errors)[0]\n", | |
"UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 184: ordinal not in range(128)\n", | |
"INFO: Started server process [743]\n", | |
"INFO: Waiting for application startup.\n", | |
"INFO: Application startup complete.\n", | |
"INFO: Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)\n" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"https://ca63-35-202-217-147.ngrok.io\n", | |
"INFO: 2400:56a0:1b2:1eab:7fc8:dc38:f0d7:a0db:0 - \"GET /v1/models HTTP/1.1\" 200 OK\n", | |
"question = 用Python写一个访问Twitter最新推文的脚本, history = []\n", | |
"INFO: 2400:56a0:1b2:1eab:7fc8:dc38:f0d7:a0db:0 - \"POST /v1/chat/completions HTTP/1.1\" 200 OK\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## 访问\n", | |
"\n", | |
"### 非流式接口\n", | |
"\n", | |
"```bash\n", | |
"curl -vvv https://6d8f-130-211-208-193.ngrok.io/v1/chat/completions \\\n", | |
" -H \"Content-Type: application/json\" \\\n", | |
" -H \"Authorization: Bearer token1\" \\\n", | |
" -d '{ \"stream\": false,\n", | |
" \"model\": \"gpt-3.5-turbo\",\n", | |
" \"messages\": [{\"role\": \"user\", \"content\": \"写一首夏天的诗\"}]\n", | |
" }'\n", | |
"```\n", | |
"\n", | |
"输出:\n", | |
"```json\n", | |
"{\"id\":\"chatcmpl-77PZm95TtxE0oYLRx3cxa6HtIDI7s\",\"object\":\"chat.completion\",\"created\":1682000966,\"model\":\"gpt-3.5-turbo-0301\",\"usage\":{\"prompt_tokens\":10,\"completion_tokens\":10,\"total_tokens\":20},\"choices\":[{\"message\":{\"role\":\"assistant\",\"content\":\"夏日的阳光下,\\n树叶闪烁着翠绿的光芒,\\n蝉鸣声不断响起,\\n伴着鸟儿的欢快歌唱。\\n\\n人们穿着轻便的衣服,\\n享受这清凉的夏日时光,\\n在海滩上晒着太阳,\\n喝着清凉的饮料,\\n聊天、欢笑、无忧无虑。\\n\\n清晨的日出,\\n天边呈现出美丽的红色,\\n太阳慢慢地升起,\\n照耀着整个天空。\\n\\n在公园里漫步,\\n欣赏着花草树木的美丽,\\n夏日的天空和大地,\\n让人感到无限的快乐。\\n\\n夏日的风吹过,\\n带来了凉爽的感觉,\\n让人感受到生命的美好,\\n让人感受到夏日的热情。\\n\\n这是一个美好的季节,\\n一个充满欢乐和热情的夏日,\\n让我们珍惜这美好的时光,\\n享受这夏日带来的快乐。\"},\"finish_reason\":\"stop\",\"index\":0}]}\n", | |
"```\n", | |
"\n", | |
"### 流式接口\n", | |
"\n", | |
"```bash\n", | |
"curl -vvv https://6d8f-130-211-208-193.ngrok.io/v1/chat/completions \\\n", | |
" -H \"Content-Type: application/json\" \\\n", | |
" -H \"Authorization: Bearer token1\" \\\n", | |
" -d '{ \"stream\": true, \n", | |
" \"model\": \"gpt-3.5-turbo\",\n", | |
" \"messages\": [{\"role\": \"user\", \"content\": \"写一首夏天的诗\"}]\n", | |
" }'\n", | |
"```\n", | |
"\n", | |
"输出:\n", | |
"```json\n", | |
"\n", | |
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"难忘的\"}, \"index\": 0, \"finish_reason\": null}]}\n", | |
"\n", | |
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"夏日\"}, \"index\": 0, \"finish_reason\": null}]}\n", | |
"\n", | |
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"时光\"}, \"index\": 0, \"finish_reason\": null}]}\n", | |
"\n", | |
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {\"content\": \"。\"}, \"index\": 0, \"finish_reason\": null}]}\n", | |
"\n", | |
"data: {\"id\": \"chatcmpl-77QWpn5cxFi9sVMw56DZReDiGKmcB\", \"object\": \"chat.completion.chunk\", \"created\": 1682004627, \"model\": \"gpt-3.5-turbo-0301\", \"choices\": [{\"delta\": {}, \"index\": 0, \"finish_reason\": \"stop\"}]}\n", | |
"\n", | |
"data: [DONE]\n", | |
"```\n" | |
], | |
"metadata": { | |
"id": "xd6IIdSaIUhr" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# tips: colab 上 uvicorn的流式输出只有第一次运行notebook的时候才会有效,所以调试的时候可以用 exit()来强制重启 notebook (不删除运行时,从而避免重新下载模型文件)\n", | |
"\n", | |
"exit()" | |
], | |
"metadata": { | |
"id": "NJbR4SKIT4Hc" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment