Skip to content

Instantly share code, notes, and snippets.

@reddgr
Last active November 25, 2024 18:36
Show Gist options
  • Save reddgr/1d47646eef4ffb857f41053813f5ecf6 to your computer and use it in GitHub Desktop.
Save reddgr/1d47646eef4ffb857f41053813f5ecf6 to your computer and use it in GitHub Desktop.
Get the detailed size of a pretrained model downloaded from HuggingFace
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Loading an NLI model and showing some info:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from transformers import AutoModelForSequenceClassification\n",
"model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli', clean_up_tokenization_spaces=True)\n",
"print(model.classification_head.out_proj)\n",
"print(model.config.id2label)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Getting the number of parameters and MB size of each layer. <br>\n",
"Source:<br>\n",
"https://stackoverflow.com/a/78659931/23755441"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Linear(in_features=1024, out_features=3, bias=True)\n",
"{0: 'contradiction', 1: 'neutral', 2: 'entailment'}\n",
"Layer: model.shared.weight; Number of parameters: 51,471,360 (torch.float32); Size: 196.35 MiB\n",
"Layer: model.encoder.embed_positions.weight; Number of parameters: 1,050,624 (torch.float32); Size: 4.01 MiB\n",
"Layer: model.encoder.layers.0.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.0.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.0.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.0.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.0.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.0.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.0.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.0.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.1.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.1.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.1.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.1.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.2.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.2.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.2.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.2.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.3.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.3.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.3.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.3.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.4.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.4.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.4.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.4.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.5.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.5.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.5.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.5.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.6.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.6.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.6.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.6.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.7.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.7.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.7.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.7.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.8.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.8.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.8.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.8.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.9.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.9.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.9.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.9.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.10.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.10.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.10.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.10.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.11.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.encoder.layers.11.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.encoder.layers.11.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layers.11.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layernorm_embedding.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.encoder.layernorm_embedding.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.embed_positions.weight; Number of parameters: 1,050,624 (torch.float32); Size: 4.01 MiB\n",
"Layer: model.decoder.layers.0.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.0.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.0.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.0.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.0.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.1.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.1.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.1.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.1.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.2.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.2.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.2.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.2.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.3.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.3.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.3.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.3.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.4.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.4.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.4.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.4.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.5.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.5.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.5.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.5.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.6.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.6.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.6.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.6.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.7.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.7.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.7.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.7.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.8.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.8.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.8.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.8.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.9.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.9.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.9.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.9.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.10.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.10.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.10.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.10.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.11.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n",
"Layer: model.decoder.layers.11.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n",
"Layer: model.decoder.layers.11.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layers.11.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layernorm_embedding.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: model.decoder.layernorm_embedding.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: classification_head.dense.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n",
"Layer: classification_head.dense.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n",
"Layer: classification_head.out_proj.weight; Number of parameters: 3,072 (torch.float32); Size: 0.01 MiB\n",
"Layer: classification_head.out_proj.bias; Number of parameters: 3 (torch.float32); Size: 0.00 MiB\n",
"Total Model Size: 1553.89 MiB\n"
]
}
],
"source": [
"def get_layer_sizes(model):\n",
" layer_sizes = {}\n",
" total_size = 0\n",
"\n",
" for name, param in model.named_parameters():\n",
" layer_size = param.numel() * param.element_size() # numel() returns the number of elements, element_size() returns the size in bytes of each element\n",
" total_size += layer_size\n",
" layer_sizes[name] = (param.numel(), layer_size, param.dtype)\n",
"\n",
" return layer_sizes, total_size\n",
"\n",
"layer_sizes, total_size = get_layer_sizes(model)\n",
"for name, size in layer_sizes.items():\n",
" print(f\"Layer: {name}; Number of parameters: {size[0]:,} ({size[2]}); Size: {size[1] / (1024 ** 2):.2f} MiB\")\n",
"print(f\"Total Model Size: {total_size / (1024 ** 2):.2f} MiB\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment