Skip to content

Instantly share code, notes, and snippets.

@h-lunah
Created August 10, 2024 17:58
Show Gist options
  • Save h-lunah/66bdd3c032f41ed6267d84b3cc06f367 to your computer and use it in GitHub Desktop.
Save h-lunah/66bdd3c032f41ed6267d84b3cc06f367 to your computer and use it in GitHub Desktop.
Untitled3.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "V28",
"authorship_tag": "ABX9TyN0qd8BegGgJA+AYkhvqzrI",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "TPU"
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/piotr25691/66bdd3c032f41ed6267d84b3cc06f367/untitled3.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "SzugSLL5CTHQ",
"outputId": "aa718120-6c68-4438-d189-012d9699f3dd"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.42.4)\n",
"Collecting transformers\n",
" Downloading transformers-4.44.0-py3-none-any.whl.metadata (43 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m782.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting flash-attn\n",
" Downloading flash_attn-2.6.3.tar.gz (2.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
" \n",
" \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
" \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
" \u001b[31m╰─>\u001b[0m See above for output.\n",
" \n",
" \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25herror\n",
"\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
"\n",
"\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n",
"\u001b[31m╰─>\u001b[0m See above for output.\n",
"\n",
"\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
"\u001b[1;36mhint\u001b[0m: See above for details.\n"
]
}
],
"source": [
"!pip install -U transformers flash-attn"
]
},
{
"cell_type": "code",
"source": [
"%%bash\n",
"cat > gemma.patch << EOF\n",
"diff --git a/SelfExtend.py b/SelfExtend.py\n",
"index 8f294fa..2aee66d 100644\n",
"--- a/SelfExtend.py\n",
"+++ b/SelfExtend.py\n",
"@@ -116,9 +116,9 @@ def apply(loaded_model, group_size, window_size, enable_flash_attention=False, s\n",
" group_size_1=group_size,\n",
" group_size_2=window_size,\n",
" scale_base=scale_base)\n",
"- # after the default version of attention in 4.36 is LlamaSpdaAttention, but in before 4,36 or in 4.38, it is LlamaAttention\n",
"+ # after the default version of attention in 4.36 is LlamaSdpaAttention, but in before 4,36 or in 4.38, it is LlamaAttention\n",
" # print(\"loaded_model\", loaded_model)\n",
"- modifed_2 = modify_method_of_instance(loaded_model, \"LlamaAttention\", \"forward\", self_extend_attention_forward)\n",
"+ modifed_2 = modify_method_of_instance(loaded_model, \"LlamaSdpaAttention\", \"forward\", self_extend_attention_forward)\n",
" if not modifed_2:\n",
" raise Exception(f\"Failed to modify the attention method of {arch_name}\")\n",
" elif 'Mistral' in arch_name:\n",
"\n",
"EOF"
],
"metadata": {
"id": "-2MJxJbpChHL"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!git clone https://github.com/datamllab/LongLM"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nUQIfAn6Cpah",
"outputId": "0ee8db74-a113-44fc-cbe6-70dbe36f2c1a"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Cloning into 'LongLM'...\n",
"remote: Enumerating objects: 189, done.\u001b[K\n",
"remote: Counting objects: 100% (87/87), done.\u001b[K\n",
"remote: Compressing objects: 100% (47/47), done.\u001b[K\n",
"remote: Total 189 (delta 58), reused 53 (delta 39), pack-reused 102\u001b[K\n",
"Receiving objects: 100% (189/189), 13.14 MiB | 32.19 MiB/s, done.\n",
"Resolving deltas: 100% (110/110), done.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!mv gemma.patch LongLM"
],
"metadata": {
"id": "cpJ5OGirDF20"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!cd LongLM && patch -Nup1 -i gemma.patch"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JBk7eSQdDJk3",
"outputId": "d97de97a-b6f3-4f29-950c-044b5ead42de"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"patching file SelfExtend.py\n",
"Hunk #1 succeeded at 116 with fuzz 1.\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!cd LongLM && sed -i 's/\\.input_ids/\\.input_ids\\.to(\"cuda\")/g' example.py"
],
"metadata": {
"id": "Q90fxxEZE8po"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!cd LongLM && sed -i \"s:meta-llama/Llama-2-7b-chat-hf:piotr25691/SystemGemma2-2b-it:g\" example.py"
],
"metadata": {
"id": "jxdk0lHEDYtO"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!cd LongLM && python3 example.py"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hwDLMXblDg_e",
"outputId": "9957617e-eba7-446b-980d-f5cac54f457f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Traceback (most recent call last):\n",
" File \"/content/LongLM/example.py\", line 12, in <module>\n",
" import SelfExtend \n",
" File \"/content/LongLM/SelfExtend.py\", line 3, in <module>\n",
" import self_extend_patch as SE\n",
" File \"/content/LongLM/self_extend_patch/__init__.py\", line 1, in <module>\n",
" from . import Llama\n",
" File \"/content/LongLM/self_extend_patch/Llama.py\", line 10, in <module>\n",
" from flash_attn import flash_attn_func, flash_attn_varlen_func\n",
"ModuleNotFoundError: No module named 'flash_attn'\n"
]
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment