Skip to content

Instantly share code, notes, and snippets.

@jrobles98
Created August 15, 2024 19:57
Show Gist options
  • Save jrobles98/788b6d9734bcbe76319e1ebdbae045e3 to your computer and use it in GitHub Desktop.
Save jrobles98/788b6d9734bcbe76319e1ebdbae045e3 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "markdown",
"id": "5ba52b5a-e51c-43e2-b3c6-b6bbabd5d720",
"metadata": {},
"source": [
"# INIT"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1696c4b-327b-4523-9d04-8415291281b0",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!pip install transformers torch "
]
},
{
"cell_type": "code",
"execution_count": 258,
"id": "1e4ee1ef-3cdf-4c91-a43b-b40a910e7279",
"metadata": {},
"outputs": [],
"source": [
"# Load model directly\n",
"import torch\n",
"from transformers import AutoTokenizer, AutoModelForCausalLM, Qwen2ForCausalLM, Qwen2Model, Qwen2Config\n",
"import timeit"
]
},
{
"cell_type": "code",
"execution_count": 260,
"id": "f3df2370-fa5f-4e63-922e-8bc7402a4859",
"metadata": {},
"outputs": [],
"source": [
"tokenizer = AutoTokenizer.from_pretrained(\"Qwen/Qwen2-0.5B\")\n",
"model = AutoModelForCausalLM.from_pretrained(\"Qwen/Qwen2-0.5B\")\n",
"new_model = AutoModelForCausalLM.from_pretrained(\"Qwen/Qwen2-0.5B\")"
]
},
{
"cell_type": "code",
"execution_count": 261,
"id": "7705bd5f-5a8c-4139-88d1-1e82d636d832",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model:\n",
"Qwen2ForCausalLM(\n",
" (model): Qwen2Model(\n",
" (embed_tokens): Embedding(151936, 896)\n",
" (layers): ModuleList(\n",
" (0-23): 24 x Qwen2DecoderLayer(\n",
" (self_attn): Qwen2SdpaAttention(\n",
" (q_proj): Linear(in_features=896, out_features=896, bias=True)\n",
" (k_proj): Linear(in_features=896, out_features=128, bias=True)\n",
" (v_proj): Linear(in_features=896, out_features=128, bias=True)\n",
" (o_proj): Linear(in_features=896, out_features=896, bias=False)\n",
" (rotary_emb): Qwen2RotaryEmbedding()\n",
" )\n",
" (mlp): Qwen2MLP(\n",
" (gate_proj): Linear(in_features=896, out_features=4864, bias=False)\n",
" (up_proj): Linear(in_features=896, out_features=4864, bias=False)\n",
" (down_proj): Linear(in_features=4864, out_features=896, bias=False)\n",
" (act_fn): SiLU()\n",
" )\n",
" (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)\n",
" (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)\n",
" )\n",
" )\n",
" (norm): Qwen2RMSNorm((896,), eps=1e-06)\n",
" )\n",
" (lm_head): Linear(in_features=896, out_features=151936, bias=False)\n",
")\n",
"\n",
"\n",
"Config:\n",
"Qwen2Config {\n",
" \"_name_or_path\": \"Qwen/Qwen2-0.5B\",\n",
" \"architectures\": [\n",
" \"Qwen2ForCausalLM\"\n",
" ],\n",
" \"attention_dropout\": 0.0,\n",
" \"bos_token_id\": 151643,\n",
" \"eos_token_id\": 151643,\n",
" \"hidden_act\": \"silu\",\n",
" \"hidden_size\": 896,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 4864,\n",
" \"max_position_embeddings\": 131072,\n",
" \"max_window_layers\": 24,\n",
" \"model_type\": \"qwen2\",\n",
" \"num_attention_heads\": 14,\n",
" \"num_hidden_layers\": 24,\n",
" \"num_key_value_heads\": 2,\n",
" \"rms_norm_eps\": 1e-06,\n",
" \"rope_theta\": 1000000.0,\n",
" \"sliding_window\": null,\n",
" \"tie_word_embeddings\": true,\n",
" \"torch_dtype\": \"bfloat16\",\n",
" \"transformers_version\": \"4.44.0\",\n",
" \"use_cache\": true,\n",
" \"use_sliding_window\": false,\n",
" \"vocab_size\": 151936\n",
"}\n",
"\n"
]
}
],
"source": [
"print(\"Model:\\n\" + str(model))\n",
"print(\"\\n\\nConfig:\\n\" + str(model.config))"
]
},
{
"cell_type": "markdown",
"id": "47f47a98-e8e1-4e29-87be-5336e8764841",
"metadata": {},
"source": [
"# Config"
]
},
{
"cell_type": "code",
"execution_count": 262,
"id": "b82fae4e-23ce-4270-8ea0-ddbf0ab4460e",
"metadata": {},
"outputs": [],
"source": [
"# Declare the total numer of layers to keep (including the last)\n",
"\n",
"n_total_layers = 20"
]
},
{
"cell_type": "markdown",
"id": "7938babc-f2a2-4529-b247-77a372ffb27f",
"metadata": {},
"source": [
"> This following cell is not going to work because the configuration is used only in the instantiation of the model and thier layers"
]
},
{
"cell_type": "code",
"execution_count": 263,
"id": "c2fe3b3f-fce3-4fae-9315-e2e52f54f72b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Param: \tMODEL \t\tNEW_MODEL\n",
"\n",
"\n",
"vocab_size \t{'vocab_size\t\t{'vocab_size':\n",
"max_position_embeddings \t32768 \t\t32768\n",
"hidden_size \t4096 \t\t4096\n",
"intermediate_size \t22016 \t\t22016\n",
"num_hidden_layers \t32 --->\t20\n",
"num_attention_heads \t32 \t\t32\n",
"use_sliding_window \tFalse \t\tFalse\n",
"sliding_window \tNone \t\tNone\n",
"max_window_layers \t28 --->\t20\n",
"num_key_value_heads \t32 \t\t32\n",
"hidden_act \tsilu \t\tsilu\n",
"initializer_range \t0.02 \t\t0.02\n",
"rms_norm_eps \t1e-06 \t\t1e-06\n",
"use_cache \tTrue \t\tTrue\n",
"rope_theta \t10000.0 \t\t10000.0\n",
"attention_dropout \t0.0 \t\t0.0\n",
"return_dict \tTrue \t\tTrue\n",
"output_hidden_states \tFalse \t\tFalse\n",
"output_attentions \tFalse \t\tFalse\n",
"torchscript \tFalse \t\tFalse\n",
"torch_dtype \tNone \t\tNone\n",
"use_bfloat16 \tFalse \t\tFalse\n",
"tf_legacy_loss \tFalse \t\tFalse\n",
"pruned_heads \t{} \t\t{}\n",
"tie_word_embeddings \tFalse \t\tFalse\n",
"chunk_size_feed_forward \t0 \t\t0\n",
"is_encoder_decoder \tFalse \t\tFalse\n",
"is_decoder \tFalse \t\tFalse\n",
"cross_attention_hidden_size \tNone \t\tNone\n",
"add_cross_attention \tFalse \t\tFalse\n",
"tie_encoder_decoder \tFalse \t\tFalse\n",
"max_length \t20 \t\t20\n",
"min_length \t0 \t\t0\n",
"do_sample \tFalse \t\tFalse\n",
"early_stopping \tFalse \t\tFalse\n",
"num_beams \t1 \t\t1\n",
"num_beam_groups \t1 \t\t1\n",
"diversity_penalty \t0.0 \t\t0.0\n",
"temperature \t1.0 \t\t1.0\n",
"top_k \t50 \t\t50\n",
"top_p \t1.0 \t\t1.0\n",
"typical_p \t1.0 \t\t1.0\n",
"repetition_penalty \t1.0 \t\t1.0\n",
"length_penalty \t1.0 \t\t1.0\n",
"no_repeat_ngram_size \t0 \t\t0\n",
"encoder_no_repeat_ngram_size \t0 \t\t0\n",
"bad_words_ids \tNone \t\tNone\n",
"num_return_sequences \t1 \t\t1\n",
"output_scores \tFalse \t\tFalse\n",
"return_dict_in_generate \tFalse \t\tFalse\n",
"forced_bos_token_id \tNone \t\tNone\n",
"forced_eos_token_id \tNone \t\tNone\n",
"remove_invalid_values \tFalse \t\tFalse\n",
"exponential_decay_length_penalty \tNone \t\tNone\n",
"suppress_tokens \tNone \t\tNone\n",
"begin_suppress_tokens \tNone \t\tNone\n",
"architectures \tNone \t\tNone\n",
"finetuning_task \tNone \t\tNone\n",
"id2label \t{0: 'LABEL_0\t\t{0: 'LABEL_0',\n",
"label2id \t{'LABEL_0': \t\t{'LABEL_0': 0,\n",
"tokenizer_class \tNone \t\tNone\n",
"prefix \tNone \t\tNone\n",
"bos_token_id \tNone \t\tNone\n",
"pad_token_id \tNone \t\tNone\n",
"eos_token_id \tNone \t\tNone\n",
"sep_token_id \tNone \t\tNone\n",
"decoder_start_token_id \tNone \t\tNone\n",
"task_specific_params \tNone \t\tNone\n",
"problem_type \tNone \t\tNone\n",
"_name_or_path \t \t\t\n",
"transformers_version \t4.44.0 \t\t4.44.0\n",
"model_type \tqwen2 \t\tqwen2\n"
]
}
],
"source": [
"old_config = Qwen2Config(model.config.to_dict())\n",
"new_config = Qwen2Config(model.config.to_dict())\n",
"new_config.update(\n",
" {\n",
" \"num_hidden_layers\":n_total_layers,\n",
" \"max_window_layers\":n_total_layers,\n",
" }\n",
")\n",
"\n",
"# apply config on new model\n",
"new_model.config = new_config\n",
"\n",
"# reporting with a header\n",
"print(\"\\nParam:\" + \" \"*29 + \"\\t\" + \"MODEL\" + \" \"*3 + \"\\t\\t\" + \"NEW_MODEL\\n\\n\")\n",
"_ = [print(param + \" \"*(35-len(param)) + \"\\t\" + str(old)[:12] + \" \"*(15-len(str(old))) + (\"--->\" if old!=new else \"\\t\") + \"\\t\" + str(new)[:14]) for param,old,new in zip(dict(old_config.to_dict()).keys(), dict(old_config.to_dict()).values(), dict(new_config.to_dict()).values())]"
]
},
{
"cell_type": "markdown",
"id": "c77a8f2b-ebeb-440d-abb2-1c785ab543ea",
"metadata": {
"jp-MarkdownHeadingCollapsed": true
},
"source": [
"# Simple test"
]
},
{
"cell_type": "code",
"execution_count": 207,
"id": "6094fade-e963-4a6c-b6e2-8f3ab38ce411",
"metadata": {},
"outputs": [],
"source": [
"inputs = tokenizer('''\n",
"<|im_start|>user Hola<|im_end|>\n",
"<|im_start|>assistant ''', return_tensors=\"pt\", return_attention_mask=False)"
]
},
{
"cell_type": "code",
"execution_count": 208,
"id": "4a3b5522-831b-491b-bf1f-5a157a55e10c",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
"Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"<|im_start|>user Hola<|im_end|>\n",
"<|im_start|>assistant 1. The first sentence of the first paragraph is a title, so it is not a proper title. The second sentence is a subtitle, so it is not a proper subtitle. The third sentence is a paragraph, so it is a proper paragraph.\n",
"9.49 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
]
}
],
"source": [
"%%timeit -r1 -n1 \n",
"outputs = model.generate(**inputs, max_new_tokens=50)\n",
"text = tokenizer.batch_decode(outputs)[0]\n",
"print(text)"
]
},
{
"cell_type": "markdown",
"id": "981396e4-f91c-43fe-b7bf-8ee14b9f184a",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"source": [
"# Layer decomposition/recomposition"
]
},
{
"cell_type": "code",
"execution_count": 264,
"id": "508c36ec-016b-4905-801c-4c9026bf2aed",
"metadata": {},
"outputs": [],
"source": [
"old_layers = model.model.layers"
]
},
{
"cell_type": "code",
"execution_count": 265,
"id": "265fdc05-6164-492c-9669-d240d5598a29",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"ModuleList(\n",
" (0-23): 24 x Qwen2DecoderLayer(\n",
" (self_attn): Qwen2SdpaAttention(\n",
" (q_proj): Linear(in_features=896, out_features=896, bias=True)\n",
" (k_proj): Linear(in_features=896, out_features=128, bias=True)\n",
" (v_proj): Linear(in_features=896, out_features=128, bias=True)\n",
" (o_proj): Linear(in_features=896, out_features=896, bias=False)\n",
" (rotary_emb): Qwen2RotaryEmbedding()\n",
" )\n",
" (mlp): Qwen2MLP(\n",
" (gate_proj): Linear(in_features=896, out_features=4864, bias=False)\n",
" (up_proj): Linear(in_features=896, out_features=4864, bias=False)\n",
" (down_proj): Linear(in_features=4864, out_features=896, bias=False)\n",
" (act_fn): SiLU()\n",
" )\n",
" (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)\n",
" (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)\n",
" )\n",
")"
]
},
"execution_count": 265,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"old_layers"
]
},
{
"cell_type": "markdown",
"id": "76c30af4-d102-47da-b058-8249c7bed767",
"metadata": {},
"source": [
"### Create a `ModuleList`\n",
"> ###### We created `ModuleList` with the first `n` layers and also the last one"
]
},
{
"cell_type": "code",
"execution_count": 266,
"id": "000d8119-06af-4493-9334-4a12c3ef6546",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '23']\n"
]
}
],
"source": [
"\n",
"new_layers = torch.nn.ModuleList(\n",
" old_layers[:n_total_layers-1] + \n",
" [\n",
" old_layers[-1]\n",
" ]\n",
")\n",
"print([str(l.self_attn.layer_idx)for l in new_layers])"
]
},
{
"cell_type": "code",
"execution_count": 216,
"id": "fdd95432-3de9-42bd-b046-2adbf35f1b01",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '23']\n"
]
}
],
"source": [
"# TEST: to not use the final layer, only the first ones found\n",
"new_layers = torch.nn.ModuleList(\n",
" old_layers[:n_total_layers]\n",
")\n",
"print([str(l.self_attn.layer_idx)for l in new_layers])"
]
},
{
"cell_type": "markdown",
"id": "4251e8dd-de8c-445a-a7cb-43d115548903",
"metadata": {},
"source": [
"### Reassign the layer ids and also their config"
]
},
{
"cell_type": "code",
"execution_count": 267,
"id": "06e88779-7905-4264-ab2c-a3eb0296b021",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']\n"
]
}
],
"source": [
"for i, layer in enumerate(new_layers):\n",
" layer.layer_idx = layer.self_attn.layer_idx = i\n",
" \n",
"print([str(l.self_attn.layer_idx)for l in new_layers])"
]
},
{
"cell_type": "markdown",
"id": "4380a691-08cf-47bd-bce9-da18b0bbf68f",
"metadata": {},
"source": [
"# Model Interpolation"
]
},
{
"cell_type": "code",
"execution_count": 268,
"id": "f614cc02-c326-4663-8fb4-a72a87c192af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Qwen2Config {\n",
" \"attention_dropout\": 0.0,\n",
" \"hidden_act\": \"silu\",\n",
" \"hidden_size\": 4096,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 22016,\n",
" \"max_position_embeddings\": 32768,\n",
" \"max_window_layers\": 20,\n",
" \"model_type\": \"qwen2\",\n",
" \"num_attention_heads\": 32,\n",
" \"num_hidden_layers\": 20,\n",
" \"num_key_value_heads\": 32,\n",
" \"rms_norm_eps\": 1e-06,\n",
" \"rope_theta\": 10000.0,\n",
" \"sliding_window\": null,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.44.0\",\n",
" \"use_cache\": true,\n",
" \"use_sliding_window\": false,\n",
" \"vocab_size\": {\n",
" \"_name_or_path\": \"Qwen/Qwen2-0.5B\",\n",
" \"add_cross_attention\": false,\n",
" \"architectures\": [\n",
" \"Qwen2ForCausalLM\"\n",
" ],\n",
" \"attention_dropout\": 0.0,\n",
" \"bad_words_ids\": null,\n",
" \"begin_suppress_tokens\": null,\n",
" \"bos_token_id\": 151643,\n",
" \"chunk_size_feed_forward\": 0,\n",
" \"cross_attention_hidden_size\": null,\n",
" \"decoder_start_token_id\": null,\n",
" \"diversity_penalty\": 0.0,\n",
" \"do_sample\": false,\n",
" \"early_stopping\": false,\n",
" \"encoder_no_repeat_ngram_size\": 0,\n",
" \"eos_token_id\": 151643,\n",
" \"exponential_decay_length_penalty\": null,\n",
" \"finetuning_task\": null,\n",
" \"forced_bos_token_id\": null,\n",
" \"forced_eos_token_id\": null,\n",
" \"hidden_act\": \"silu\",\n",
" \"hidden_size\": 896,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 4864,\n",
" \"is_decoder\": false,\n",
" \"is_encoder_decoder\": false,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1\n",
" },\n",
" \"length_penalty\": 1.0,\n",
" \"max_length\": 20,\n",
" \"max_position_embeddings\": 131072,\n",
" \"max_window_layers\": 24,\n",
" \"min_length\": 0,\n",
" \"model_type\": \"qwen2\",\n",
" \"no_repeat_ngram_size\": 0,\n",
" \"num_attention_heads\": 14,\n",
" \"num_beam_groups\": 1,\n",
" \"num_beams\": 1,\n",
" \"num_hidden_layers\": 24,\n",
" \"num_key_value_heads\": 2,\n",
" \"num_return_sequences\": 1,\n",
" \"output_attentions\": false,\n",
" \"output_hidden_states\": false,\n",
" \"output_scores\": false,\n",
" \"pad_token_id\": null,\n",
" \"prefix\": null,\n",
" \"problem_type\": null,\n",
" \"pruned_heads\": {},\n",
" \"remove_invalid_values\": false,\n",
" \"repetition_penalty\": 1.0,\n",
" \"return_dict\": true,\n",
" \"return_dict_in_generate\": false,\n",
" \"rms_norm_eps\": 1e-06,\n",
" \"rope_theta\": 1000000.0,\n",
" \"sep_token_id\": null,\n",
" \"sliding_window\": null,\n",
" \"suppress_tokens\": null,\n",
" \"task_specific_params\": null,\n",
" \"temperature\": 1.0,\n",
" \"tf_legacy_loss\": false,\n",
" \"tie_encoder_decoder\": false,\n",
" \"tie_word_embeddings\": true,\n",
" \"tokenizer_class\": null,\n",
" \"top_k\": 50,\n",
" \"top_p\": 1.0,\n",
" \"torch_dtype\": \"bfloat16\",\n",
" \"torchscript\": false,\n",
" \"transformers_version\": \"4.44.0\",\n",
" \"typical_p\": 1.0,\n",
" \"use_bfloat16\": false,\n",
" \"use_cache\": true,\n",
" \"use_sliding_window\": false,\n",
" \"vocab_size\": 151936\n",
" }\n",
"}"
]
},
"execution_count": 268,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_model.config"
]
},
{
"cell_type": "code",
"execution_count": 269,
"id": "ce4e50df-fa65-4f94-9212-29d86acc4862",
"metadata": {},
"outputs": [],
"source": [
"new_model.model.layers = new_layers"
]
},
{
"cell_type": "code",
"execution_count": 270,
"id": "b3c58c14-7b51-4a6c-8f98-a36cfd9bca55",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Qwen2Model(\n",
" (embed_tokens): Embedding(151936, 896)\n",
" (layers): ModuleList(\n",
" (0-19): 20 x Qwen2DecoderLayer(\n",
" (self_attn): Qwen2SdpaAttention(\n",
" (q_proj): Linear(in_features=896, out_features=896, bias=True)\n",
" (k_proj): Linear(in_features=896, out_features=128, bias=True)\n",
" (v_proj): Linear(in_features=896, out_features=128, bias=True)\n",
" (o_proj): Linear(in_features=896, out_features=896, bias=False)\n",
" (rotary_emb): Qwen2RotaryEmbedding()\n",
" )\n",
" (mlp): Qwen2MLP(\n",
" (gate_proj): Linear(in_features=896, out_features=4864, bias=False)\n",
" (up_proj): Linear(in_features=896, out_features=4864, bias=False)\n",
" (down_proj): Linear(in_features=4864, out_features=896, bias=False)\n",
" (act_fn): SiLU()\n",
" )\n",
" (input_layernorm): Qwen2RMSNorm((896,), eps=1e-06)\n",
" (post_attention_layernorm): Qwen2RMSNorm((896,), eps=1e-06)\n",
" )\n",
" )\n",
" (norm): Qwen2RMSNorm((896,), eps=1e-06)\n",
")"
]
},
"execution_count": 270,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_model.model"
]
},
{
"cell_type": "markdown",
"id": "cf3b611e-5973-492e-9f12-923dc62a7b0c",
"metadata": {},
"source": [
"# Final testing"
]
},
{
"cell_type": "markdown",
"id": "20e81995-b63a-4547-a381-ee35defec98a",
"metadata": {},
"source": [
"#### Generate"
]
},
{
"cell_type": "code",
"execution_count": 274,
"id": "9f9c5e19-c90f-4ae6-aac7-5c68b70f9286",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
"Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"9.55 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
]
}
],
"source": [
"%%timeit -r1 -n1 \n",
"outputs = model.generate(**inputs, max_new_tokens=50)"
]
},
{
"cell_type": "code",
"execution_count": 272,
"id": "e6059909-2993-46ea-ad2b-04cf3b95379b",
"metadata": {
"editable": true,
"slideshow": {
"slide_type": ""
},
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
"Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"8.36 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)\n"
]
}
],
"source": [
"%%timeit -r1 -n1 \n",
"outputs = new_model.generate(**inputs, max_new_tokens=50)"
]
},
{
"cell_type": "markdown",
"id": "13b39e56-c1a5-4deb-9221-8dfa20fdec3f",
"metadata": {},
"source": [
"#### decode the answer"
]
},
{
"cell_type": "code",
"execution_count": 275,
"id": "b5f96806-6ec0-4f74-b78d-7b8bf212c04d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"<|im_start|>user Hola, te llamas Juana<|im_end|>\n",
"<|im_start|>assistant 1. Hola, te llamas Juana\n"
]
}
],
"source": [
"text = tokenizer.batch_decode(outputs)[0]\n",
"print(text) 1. Hola, te llamas Juana"
]
},
{
"cell_type": "markdown",
"id": "aecdb247-ff40-44aa-abb6-9013c7425955",
"metadata": {},
"source": [
"# CLEANUP"
]
},
{
"cell_type": "code",
"execution_count": 276,
"id": "867af9c0-0ec8-42b0-8893-994afaf3f324",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Successfuly deleted: \"model\".\n",
"Successfuly deleted: \"new_model\".\n",
"Successfuly deleted: \"old_config\".\n",
"Successfuly deleted: \"new_config\".\n",
"Successfuly deleted: \"old_layers\".\n",
"Successfuly deleted: \"new_layers\".\n",
"Successfuly deleted: \"tokenizer\".\n"
]
}
],
"source": [
"var_bin = (\"model\", \"new_model\", \"old_config\", \"new_config\", \"old_layers\", \"new_layers\", \"tokenizer\")\n",
"for variable in var_bin:\n",
" try:\n",
" exec(f'del {variable} ')\n",
" print(f'Successfuly deleted: \"{variable}\".')\n",
" except NameError:\n",
" print(f'The variable \"{variable}\" was not found.')\n",
"del var_bin"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment