Last active
November 25, 2024 18:36
-
-
Save reddgr/1d47646eef4ffb857f41053813f5ecf6 to your computer and use it in GitHub Desktop.
Get the detailed size of a pretrained model downloaded from HuggingFace
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Loading an NLI model and showing some info:" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from transformers import AutoModelForSequenceClassification\n", | |
"model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli', clean_up_tokenization_spaces=True)\n", | |
"print(model.classification_head.out_proj)\n", | |
"print(model.config.id2label)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Getting the number of parameters and MB size of each layer. <br>\n", | |
"Source:<br>\n", | |
"https://stackoverflow.com/a/78659931/23755441" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Linear(in_features=1024, out_features=3, bias=True)\n", | |
"{0: 'contradiction', 1: 'neutral', 2: 'entailment'}\n", | |
"Layer: model.shared.weight; Number of parameters: 51,471,360 (torch.float32); Size: 196.35 MiB\n", | |
"Layer: model.encoder.embed_positions.weight; Number of parameters: 1,050,624 (torch.float32); Size: 4.01 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.0.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.0.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.0.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.0.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.0.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.0.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.0.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.1.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.1.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.1.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.1.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.2.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.2.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.2.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.2.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.3.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.3.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.3.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.3.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.4.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.4.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.4.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.4.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.5.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.5.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.5.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.5.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.6.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.6.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.6.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.6.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.7.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.7.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.7.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.7.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.8.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.8.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.8.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.8.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.9.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.9.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.9.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.9.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.10.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.10.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.10.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.10.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.11.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.encoder.layers.11.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.encoder.layers.11.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layers.11.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layernorm_embedding.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.encoder.layernorm_embedding.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.embed_positions.weight; Number of parameters: 1,050,624 (torch.float32); Size: 4.01 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.0.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.0.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.0.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.0.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.1.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.1.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.1.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.1.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.2.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.2.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.2.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.2.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.3.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.3.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.3.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.3.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.4.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.4.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.4.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.4.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.5.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.5.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.5.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.5.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.6.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.6.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.6.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.6.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.7.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.7.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.7.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.7.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.8.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.8.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.8.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.8.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.9.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.9.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.9.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.9.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.10.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.10.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.10.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.10.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.self_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn.k_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn.k_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn.v_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn.v_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn.q_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn.q_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn.out_proj.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn.out_proj.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.encoder_attn_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.fc1.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.11.fc1.bias; Number of parameters: 4,096 (torch.float32); Size: 0.02 MiB\n", | |
"Layer: model.decoder.layers.11.fc2.weight; Number of parameters: 4,194,304 (torch.float32); Size: 16.00 MiB\n", | |
"Layer: model.decoder.layers.11.fc2.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.final_layer_norm.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layers.11.final_layer_norm.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layernorm_embedding.weight; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: model.decoder.layernorm_embedding.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: classification_head.dense.weight; Number of parameters: 1,048,576 (torch.float32); Size: 4.00 MiB\n", | |
"Layer: classification_head.dense.bias; Number of parameters: 1,024 (torch.float32); Size: 0.00 MiB\n", | |
"Layer: classification_head.out_proj.weight; Number of parameters: 3,072 (torch.float32); Size: 0.01 MiB\n", | |
"Layer: classification_head.out_proj.bias; Number of parameters: 3 (torch.float32); Size: 0.00 MiB\n", | |
"Total Model Size: 1553.89 MiB\n" | |
] | |
} | |
], | |
"source": [ | |
"def get_layer_sizes(model):\n", | |
" layer_sizes = {}\n", | |
" total_size = 0\n", | |
"\n", | |
" for name, param in model.named_parameters():\n", | |
" layer_size = param.numel() * param.element_size() # numel() returns the number of elements, element_size() returns the size in bytes of each element\n", | |
" total_size += layer_size\n", | |
" layer_sizes[name] = (param.numel(), layer_size, param.dtype)\n", | |
"\n", | |
" return layer_sizes, total_size\n", | |
"\n", | |
"layer_sizes, total_size = get_layer_sizes(model)\n", | |
"for name, size in layer_sizes.items():\n", | |
" print(f\"Layer: {name}; Number of parameters: {size[0]:,} ({size[2]}); Size: {size[1] / (1024 ** 2):.2f} MiB\")\n", | |
"print(f\"Total Model Size: {total_size / (1024 ** 2):.2f} MiB\")" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "base", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment