Created
August 10, 2024 17:58
-
-
Save h-lunah/66bdd3c032f41ed6267d84b3cc06f367 to your computer and use it in GitHub Desktop.
Untitled3.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"gpuType": "V28", | |
"authorship_tag": "ABX9TyN0qd8BegGgJA+AYkhvqzrI", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"accelerator": "TPU" | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/piotr25691/66bdd3c032f41ed6267d84b3cc06f367/untitled3.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "SzugSLL5CTHQ", | |
"outputId": "aa718120-6c68-4438-d189-012d9699f3dd" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.42.4)\n", | |
"Collecting transformers\n", | |
" Downloading transformers-4.44.0-py3-none-any.whl.metadata (43 kB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.7/43.7 kB\u001b[0m \u001b[31m782.2 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25hCollecting flash-attn\n", | |
" Downloading flash_attn-2.6.3.tar.gz (2.6 MB)\n", | |
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", | |
"\u001b[?25h \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n", | |
" \n", | |
" \u001b[31m×\u001b[0m \u001b[32mpython setup.py egg_info\u001b[0m did not run successfully.\n", | |
" \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n", | |
" \u001b[31m╰─>\u001b[0m See above for output.\n", | |
" \n", | |
" \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n", | |
" Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25herror\n", | |
"\u001b[1;31merror\u001b[0m: \u001b[1mmetadata-generation-failed\u001b[0m\n", | |
"\n", | |
"\u001b[31m×\u001b[0m Encountered error while generating package metadata.\n", | |
"\u001b[31m╰─>\u001b[0m See above for output.\n", | |
"\n", | |
"\u001b[1;35mnote\u001b[0m: This is an issue with the package mentioned above, not pip.\n", | |
"\u001b[1;36mhint\u001b[0m: See above for details.\n" | |
] | |
} | |
], | |
"source": [ | |
"!pip install -U transformers flash-attn" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"%%bash\n", | |
"cat > gemma.patch << EOF\n", | |
"diff --git a/SelfExtend.py b/SelfExtend.py\n", | |
"index 8f294fa..2aee66d 100644\n", | |
"--- a/SelfExtend.py\n", | |
"+++ b/SelfExtend.py\n", | |
"@@ -116,9 +116,9 @@ def apply(loaded_model, group_size, window_size, enable_flash_attention=False, s\n", | |
" group_size_1=group_size,\n", | |
" group_size_2=window_size,\n", | |
" scale_base=scale_base)\n", | |
"- # after the default version of attention in 4.36 is LlamaSpdaAttention, but in before 4,36 or in 4.38, it is LlamaAttention\n", | |
"+ # after the default version of attention in 4.36 is LlamaSdpaAttention, but in before 4,36 or in 4.38, it is LlamaAttention\n", | |
" # print(\"loaded_model\", loaded_model)\n", | |
"- modifed_2 = modify_method_of_instance(loaded_model, \"LlamaAttention\", \"forward\", self_extend_attention_forward)\n", | |
"+ modifed_2 = modify_method_of_instance(loaded_model, \"LlamaSdpaAttention\", \"forward\", self_extend_attention_forward)\n", | |
" if not modifed_2:\n", | |
" raise Exception(f\"Failed to modify the attention method of {arch_name}\")\n", | |
" elif 'Mistral' in arch_name:\n", | |
"\n", | |
"EOF" | |
], | |
"metadata": { | |
"id": "-2MJxJbpChHL" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!git clone https://github.com/datamllab/LongLM" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "nUQIfAn6Cpah", | |
"outputId": "0ee8db74-a113-44fc-cbe6-70dbe36f2c1a" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Cloning into 'LongLM'...\n", | |
"remote: Enumerating objects: 189, done.\u001b[K\n", | |
"remote: Counting objects: 100% (87/87), done.\u001b[K\n", | |
"remote: Compressing objects: 100% (47/47), done.\u001b[K\n", | |
"remote: Total 189 (delta 58), reused 53 (delta 39), pack-reused 102\u001b[K\n", | |
"Receiving objects: 100% (189/189), 13.14 MiB | 32.19 MiB/s, done.\n", | |
"Resolving deltas: 100% (110/110), done.\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!mv gemma.patch LongLM" | |
], | |
"metadata": { | |
"id": "cpJ5OGirDF20" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!cd LongLM && patch -Nup1 -i gemma.patch" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "JBk7eSQdDJk3", | |
"outputId": "d97de97a-b6f3-4f29-950c-044b5ead42de" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"patching file SelfExtend.py\n", | |
"Hunk #1 succeeded at 116 with fuzz 1.\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!cd LongLM && sed -i 's/\\.input_ids/\\.input_ids\\.to(\"cuda\")/g' example.py" | |
], | |
"metadata": { | |
"id": "Q90fxxEZE8po" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!cd LongLM && sed -i \"s:meta-llama/Llama-2-7b-chat-hf:piotr25691/SystemGemma2-2b-it:g\" example.py" | |
], | |
"metadata": { | |
"id": "jxdk0lHEDYtO" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"!cd LongLM && python3 example.py" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "hwDLMXblDg_e", | |
"outputId": "9957617e-eba7-446b-980d-f5cac54f457f" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Traceback (most recent call last):\n", | |
" File \"/content/LongLM/example.py\", line 12, in <module>\n", | |
" import SelfExtend \n", | |
" File \"/content/LongLM/SelfExtend.py\", line 3, in <module>\n", | |
" import self_extend_patch as SE\n", | |
" File \"/content/LongLM/self_extend_patch/__init__.py\", line 1, in <module>\n", | |
" from . import Llama\n", | |
" File \"/content/LongLM/self_extend_patch/Llama.py\", line 10, in <module>\n", | |
" from flash_attn import flash_attn_func, flash_attn_varlen_func\n", | |
"ModuleNotFoundError: No module named 'flash_attn'\n" | |
] | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment