h-lunah · August 10, 2024 17:58
diff --git a/untitled3.ipynb b/untitled3.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "V28",
      "authorship_tag": "ABX9TyN0qd8BegGgJA+AYkhvqzrI",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "TPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/piotr25691/66bdd3c032f41ed6267d84b3cc06f367/untitled3.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "SzugSLL5CTHQ",
        "outputId": "aa718120-6c68-4438-d189-012d9699f3dd"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.42.4)\n",
            "Collecting transformers\n",
            "  Downloading transformers-4.44.0-py3-none-any.whl.metadata (43 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m782.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting flash-attn\n",
            "  Downloading flash_attn-2.6.3.tar.gz (2.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
            "  \n",
            "  \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
            "  \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
            "  \u001b[31m╰─>\u001b[0m See above for output.\n",
            "  \n",
            "  \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25herror\n",
            "\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
            "\n",
            "\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n",
            "\u001b[31m╰─>\u001b[0m See above for output.\n",
            "\n",
            "\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
            "\u001b[1;36mhint\u001b[0m: See above for details.\n"
          ]
        }
      ],
      "source": [
        "!pip install -U transformers flash-attn"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%%bash\n",
        "cat > gemma.patch << EOF\n",
        "diff --git a/SelfExtend.py b/SelfExtend.py\n",
        "index 8f294fa..2aee66d 100644\n",
        "--- a/SelfExtend.py\n",
        "+++ b/SelfExtend.py\n",
        "@@ -116,9 +116,9 @@ def apply(loaded_model, group_size, window_size, enable_flash_attention=False, s\n",
        "                                             group_size_1=group_size,\n",
        "                                             group_size_2=window_size,\n",
        "                                             scale_base=scale_base)\n",
        "-            # after the default version of attention in 4.36 is LlamaSpdaAttention, but in before 4,36 or in 4.38, it is LlamaAttention\n",
        "+            # after the default version of attention in 4.36 is LlamaSdpaAttention, but in before 4,36 or in 4.38, it is LlamaAttention\n",
        "             # print(\"loaded_model\", loaded_model)\n",
        "-            modifed_2 = modify_method_of_instance(loaded_model, \"LlamaAttention\", \"forward\", self_extend_attention_forward)\n",
        "+            modifed_2 = modify_method_of_instance(loaded_model, \"LlamaSdpaAttention\", \"forward\", self_extend_attention_forward)\n",
        "             if not modifed_2:\n",
        "                 raise Exception(f\"Failed to modify the attention method of {arch_name}\")\n",
        "     elif 'Mistral' in arch_name:\n",
        "\n",
        "EOF"
      ],
      "metadata": {
        "id": "-2MJxJbpChHL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!git clone https://github.com/datamllab/LongLM"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "nUQIfAn6Cpah",
        "outputId": "0ee8db74-a113-44fc-cbe6-70dbe36f2c1a"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Cloning into 'LongLM'...\n",
            "remote: Enumerating objects: 189, done.\u001b[K\n",
            "remote: Counting objects: 100% (87/87), done.\u001b[K\n",
            "remote: Compressing objects: 100% (47/47), done.\u001b[K\n",
            "remote: Total 189 (delta 58), reused 53 (delta 39), pack-reused 102\u001b[K\n",
            "Receiving objects: 100% (189/189), 13.14 MiB | 32.19 MiB/s, done.\n",
            "Resolving deltas: 100% (110/110), done.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!mv gemma.patch LongLM"
      ],
      "metadata": {
        "id": "cpJ5OGirDF20"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!cd LongLM && patch -Nup1 -i gemma.patch"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JBk7eSQdDJk3",
        "outputId": "d97de97a-b6f3-4f29-950c-044b5ead42de"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "patching file SelfExtend.py\n",
            "Hunk #1 succeeded at 116 with fuzz 1.\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!cd LongLM && sed -i 's/\\.input_ids/\\.input_ids\\.to(\"cuda\")/g' example.py"
      ],
      "metadata": {
        "id": "Q90fxxEZE8po"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!cd LongLM && sed -i \"s:meta-llama/Llama-2-7b-chat-hf:piotr25691/SystemGemma2-2b-it:g\" example.py"
      ],
      "metadata": {
        "id": "jxdk0lHEDYtO"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!cd LongLM && python3 example.py"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "hwDLMXblDg_e",
        "outputId": "9957617e-eba7-446b-980d-f5cac54f457f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Traceback (most recent call last):\n",
            "  File \"/content/LongLM/example.py\", line 12, in <module>\n",
            "    import SelfExtend \n",
            "  File \"/content/LongLM/SelfExtend.py\", line 3, in <module>\n",
            "    import self_extend_patch as SE\n",
            "  File \"/content/LongLM/self_extend_patch/__init__.py\", line 1, in <module>\n",
            "    from . import Llama\n",
            "  File \"/content/LongLM/self_extend_patch/Llama.py\", line 10, in <module>\n",
            "    from flash_attn import flash_attn_func, flash_attn_varlen_func\n",
            "ModuleNotFoundError: No module named 'flash_attn'\n"
          ]
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"gpuType": "V28",
	"authorship_tag": "ABX9TyN0qd8BegGgJA+AYkhvqzrI",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	},
	"accelerator": "TPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/piotr25691/66bdd3c032f41ed6267d84b3cc06f367/untitled3.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "SzugSLL5CTHQ",
	"outputId": "aa718120-6c68-4438-d189-012d9699f3dd"
	},
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.42.4)\n",
	"Collecting transformers\n",
	" Downloading transformers-4.44.0-py3-none-any.whl.metadata (43 kB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m782.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25hCollecting flash-attn\n",
	" Downloading flash_attn-2.6.3.tar.gz (2.6 MB)\n",
	"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
	"\u001b[?25h \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n",
	" \n",
	" \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n",
	" \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n",
	" \u001b[31m╰─>\u001b[0m See above for output.\n",
	" \n",
	" \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n",
	" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25herror\n",
	"\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n",
	"\n",
	"\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n",
	"\u001b[31m╰─>\u001b[0m See above for output.\n",
	"\n",
	"\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n",
	"\u001b[1;36mhint\u001b[0m: See above for details.\n"
	]
	}
	],
	"source": [
	"!pip install -U transformers flash-attn"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"%%bash\n",
	"cat > gemma.patch << EOF\n",
	"diff --git a/SelfExtend.py b/SelfExtend.py\n",
	"index 8f294fa..2aee66d 100644\n",
	"--- a/SelfExtend.py\n",
	"+++ b/SelfExtend.py\n",
	"@@ -116,9 +116,9 @@ def apply(loaded_model, group_size, window_size, enable_flash_attention=False, s\n",
	" group_size_1=group_size,\n",
	" group_size_2=window_size,\n",
	" scale_base=scale_base)\n",
	"- # after the default version of attention in 4.36 is LlamaSpdaAttention, but in before 4,36 or in 4.38, it is LlamaAttention\n",
	"+ # after the default version of attention in 4.36 is LlamaSdpaAttention, but in before 4,36 or in 4.38, it is LlamaAttention\n",
	" # print(\"loaded_model\", loaded_model)\n",
	"- modifed_2 = modify_method_of_instance(loaded_model, \"LlamaAttention\", \"forward\", self_extend_attention_forward)\n",
	"+ modifed_2 = modify_method_of_instance(loaded_model, \"LlamaSdpaAttention\", \"forward\", self_extend_attention_forward)\n",
	" if not modifed_2:\n",
	" raise Exception(f\"Failed to modify the attention method of {arch_name}\")\n",
	" elif 'Mistral' in arch_name:\n",
	"\n",
	"EOF"
	],
	"metadata": {
	"id": "-2MJxJbpChHL"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"!git clone https://github.com/datamllab/LongLM"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "nUQIfAn6Cpah",
	"outputId": "0ee8db74-a113-44fc-cbe6-70dbe36f2c1a"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Cloning into 'LongLM'...\n",
	"remote: Enumerating objects: 189, done.\u001b[K\n",
	"remote: Counting objects: 100% (87/87), done.\u001b[K\n",
	"remote: Compressing objects: 100% (47/47), done.\u001b[K\n",
	"remote: Total 189 (delta 58), reused 53 (delta 39), pack-reused 102\u001b[K\n",
	"Receiving objects: 100% (189/189), 13.14 MiB \| 32.19 MiB/s, done.\n",
	"Resolving deltas: 100% (110/110), done.\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"!mv gemma.patch LongLM"
	],
	"metadata": {
	"id": "cpJ5OGirDF20"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"!cd LongLM && patch -Nup1 -i gemma.patch"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "JBk7eSQdDJk3",
	"outputId": "d97de97a-b6f3-4f29-950c-044b5ead42de"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"patching file SelfExtend.py\n",
	"Hunk #1 succeeded at 116 with fuzz 1.\n"
	]
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"!cd LongLM && sed -i 's/\\.input_ids/\\.input_ids\\.to(\"cuda\")/g' example.py"
	],
	"metadata": {
	"id": "Q90fxxEZE8po"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"!cd LongLM && sed -i \"s:meta-llama/Llama-2-7b-chat-hf:piotr25691/SystemGemma2-2b-it:g\" example.py"
	],
	"metadata": {
	"id": "jxdk0lHEDYtO"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"!cd LongLM && python3 example.py"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "hwDLMXblDg_e",
	"outputId": "9957617e-eba7-446b-980d-f5cac54f457f"
	},
	"execution_count": null,
	"outputs": [
	{
	"output_type": "stream",
	"name": "stdout",
	"text": [
	"Traceback (most recent call last):\n",
	" File \"/content/LongLM/example.py\", line 12, in <module>\n",
	" import SelfExtend \n",
	" File \"/content/LongLM/SelfExtend.py\", line 3, in <module>\n",
	" import self_extend_patch as SE\n",
	" File \"/content/LongLM/self_extend_patch/__init__.py\", line 1, in <module>\n",
	" from . import Llama\n",
	" File \"/content/LongLM/self_extend_patch/Llama.py\", line 10, in <module>\n",
	" from flash_attn import flash_attn_func, flash_attn_varlen_func\n",
	"ModuleNotFoundError: No module named 'flash_attn'\n"
	]
	}
	]
	}
	]
	}