Created
July 12, 2025 23:20
-
-
Save ubergarm/d9a3e89355199fc34d8c75882bcc3ab4 to your computer and use it in GitHub Desktop.
Comparing evshiron+triton-cpu vs mainline casting (with or without triton-cpu) for DeepSeek fp8 safetensors to bf16 GGUF with ik/llama.cpp forks.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| * File is LITTLE endian, script is running on a LITTLE endian host. | |
| * Dumping 51 key/value pair(s) | |
| 1: UINT32 | 1 | GGUF.version = 3 | |
| 2: UINT64 | 1 | GGUF.tensor_count = 79 | |
| 3: UINT64 | 1 | GGUF.kv_count = 48 | |
| 4: STRING | 1 | general.architecture = 'deepseek2' | |
| 5: STRING | 1 | general.type = 'model' | |
| 6: STRING | 1 | general.name = 'DeepSeek R1 0528' | |
| 7: STRING | 1 | general.version = '0528' | |
| 8: STRING | 1 | general.basename = 'DeepSeek-R1' | |
| 9: STRING | 1 | general.size_label = '256x21B' | |
| 10: UINT32 | 1 | deepseek2.block_count = 61 | |
| 11: UINT32 | 1 | deepseek2.context_length = 163840 | |
| 12: UINT32 | 1 | deepseek2.embedding_length = 7168 | |
| 13: UINT32 | 1 | deepseek2.feed_forward_length = 18432 | |
| 14: UINT32 | 1 | deepseek2.attention.head_count = 128 | |
| 15: UINT32 | 1 | deepseek2.attention.head_count_kv = 128 | |
| 16: FLOAT32 | 1 | deepseek2.rope.freq_base = 10000.0 | |
| 17: FLOAT32 | 1 | deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07 | |
| 18: UINT32 | 1 | deepseek2.expert_used_count = 8 | |
| 19: UINT32 | 1 | general.file_type = 32 | |
| 20: UINT32 | 1 | deepseek2.leading_dense_block_count = 3 | |
| 21: UINT32 | 1 | deepseek2.vocab_size = 129280 | |
| 22: UINT32 | 1 | deepseek2.attention.q_lora_rank = 1536 | |
| 23: UINT32 | 1 | deepseek2.attention.kv_lora_rank = 512 | |
| 24: UINT32 | 1 | deepseek2.attention.key_length = 192 | |
| 25: UINT32 | 1 | deepseek2.attention.value_length = 128 | |
| 26: UINT32 | 1 | deepseek2.expert_feed_forward_length = 2048 | |
| 27: UINT32 | 1 | deepseek2.expert_count = 256 | |
| 28: UINT32 | 1 | deepseek2.expert_shared_count = 1 | |
| 29: FLOAT32 | 1 | deepseek2.expert_weights_scale = 2.5 | |
| 30: BOOL | 1 | deepseek2.expert_weights_norm = True | |
| 31: UINT32 | 1 | deepseek2.expert_gating_func = 2 | |
| 32: UINT32 | 1 | deepseek2.rope.dimension_count = 64 | |
| 33: STRING | 1 | deepseek2.rope.scaling.type = 'yarn' | |
| 34: FLOAT32 | 1 | deepseek2.rope.scaling.factor = 40.0 | |
| 35: UINT32 | 1 | deepseek2.rope.scaling.original_context_length = 4096 | |
| 36: FLOAT32 | 1 | deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612 | |
| 37: STRING | 1 | tokenizer.ggml.model = 'gpt2' | |
| 38: STRING | 1 | tokenizer.ggml.pre = 'deepseek-v3' | |
| 39: [STRING] | 129280 | tokenizer.ggml.tokens | |
| 40: [INT32] | 129280 | tokenizer.ggml.token_type | |
| 41: [STRING] | 127741 | tokenizer.ggml.merges | |
| 42: UINT32 | 1 | tokenizer.ggml.bos_token_id = 0 | |
| 43: UINT32 | 1 | tokenizer.ggml.eos_token_id = 1 | |
| 44: UINT32 | 1 | tokenizer.ggml.padding_token_id = 1 | |
| 45: BOOL | 1 | tokenizer.ggml.add_bos_token = True | |
| 46: BOOL | 1 | tokenizer.ggml.add_eos_token = False | |
| 47: STRING | 1 | tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene' | |
| 48: UINT32 | 1 | general.quantization_version = 2 | |
| 49: UINT16 | 1 | split.no = 0 | |
| 50: UINT16 | 1 | split.count = 30 | |
| 51: INT32 | 1 | split.tensors.count = 1147 | |
| * Dumping 79 tensor(s) | |
| 1: 926679040 | 7168, 129280, 1, 1 | BF16 | token_embd.weight | |
| 2: 7168 | 7168, 1, 1, 1 | F32 | blk.0.attn_norm.weight | |
| 3: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.0.ffn_down.weight | |
| 4: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_gate.weight | |
| 5: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_up.weight | |
| 6: 7168 | 7168, 1, 1, 1 | F32 | blk.0.ffn_norm.weight | |
| 7: 512 | 512, 1, 1, 1 | F32 | blk.0.attn_kv_a_norm.weight | |
| 8: 4128768 | 7168, 576, 1, 1 | BF16 | blk.0.attn_kv_a_mqa.weight | |
| 9: 16777216 | 512, 32768, 1, 1 | BF16 | blk.0.attn_kv_b.weight | |
| 10: 8388608 | 128, 65536, 1, 1 | BF16 | blk.0.attn_k_b.weight | |
| 11: 8388608 | 512, 16384, 1, 1 | BF16 | blk.0.attn_v_b.weight | |
| 12: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.0.attn_output.weight | |
| 13: 1536 | 1536, 1, 1, 1 | F32 | blk.0.attn_q_a_norm.weight | |
| 14: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.0.attn_q_a.weight | |
| 15: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.0.attn_q_b.weight | |
| 16: 7168 | 7168, 1, 1, 1 | F32 | blk.1.attn_norm.weight | |
| 17: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.1.ffn_down.weight | |
| 18: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_gate.weight | |
| 19: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_up.weight | |
| 20: 7168 | 7168, 1, 1, 1 | F32 | blk.1.ffn_norm.weight | |
| 21: 512 | 512, 1, 1, 1 | F32 | blk.1.attn_kv_a_norm.weight | |
| 22: 4128768 | 7168, 576, 1, 1 | BF16 | blk.1.attn_kv_a_mqa.weight | |
| 23: 16777216 | 512, 32768, 1, 1 | BF16 | blk.1.attn_kv_b.weight | |
| 24: 8388608 | 128, 65536, 1, 1 | BF16 | blk.1.attn_k_b.weight | |
| 25: 8388608 | 512, 16384, 1, 1 | BF16 | blk.1.attn_v_b.weight | |
| 26: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.1.attn_output.weight | |
| 27: 1536 | 1536, 1, 1, 1 | F32 | blk.1.attn_q_a_norm.weight | |
| 28: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.1.attn_q_a.weight | |
| 29: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.1.attn_q_b.weight | |
| 30: 7168 | 7168, 1, 1, 1 | F32 | blk.2.attn_norm.weight | |
| 31: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.2.ffn_down.weight | |
| 32: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_gate.weight | |
| 33: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_up.weight | |
| 34: 7168 | 7168, 1, 1, 1 | F32 | blk.2.ffn_norm.weight | |
| 35: 512 | 512, 1, 1, 1 | F32 | blk.2.attn_kv_a_norm.weight | |
| 36: 4128768 | 7168, 576, 1, 1 | BF16 | blk.2.attn_kv_a_mqa.weight | |
| 37: 16777216 | 512, 32768, 1, 1 | BF16 | blk.2.attn_kv_b.weight | |
| 38: 8388608 | 128, 65536, 1, 1 | BF16 | blk.2.attn_k_b.weight | |
| 39: 8388608 | 512, 16384, 1, 1 | BF16 | blk.2.attn_v_b.weight | |
| 40: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.2.attn_output.weight | |
| 41: 1536 | 1536, 1, 1, 1 | F32 | blk.2.attn_q_a_norm.weight | |
| 42: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.2.attn_q_a.weight | |
| 43: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.2.attn_q_b.weight | |
| 44: 256 | 256, 1, 1, 1 | F32 | blk.3.exp_probs_b.bias | |
| 45: 1835008 | 7168, 256, 1, 1 | F32 | blk.3.ffn_gate_inp.weight | |
| 46: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.3.ffn_down_shexp.weight | |
| 47: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_gate_shexp.weight | |
| 48: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_up_shexp.weight | |
| 49: 512 | 512, 1, 1, 1 | F32 | blk.3.attn_kv_a_norm.weight | |
| 50: 4128768 | 7168, 576, 1, 1 | BF16 | blk.3.attn_kv_a_mqa.weight | |
| 51: 16777216 | 512, 32768, 1, 1 | BF16 | blk.3.attn_kv_b.weight | |
| 52: 8388608 | 128, 65536, 1, 1 | BF16 | blk.3.attn_k_b.weight | |
| 53: 8388608 | 512, 16384, 1, 1 | BF16 | blk.3.attn_v_b.weight | |
| 54: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.3.attn_output.weight | |
| 55: 1536 | 1536, 1, 1, 1 | F32 | blk.3.attn_q_a_norm.weight | |
| 56: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.3.attn_q_a.weight | |
| 57: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.3.attn_q_b.weight | |
| 58: 7168 | 7168, 1, 1, 1 | F32 | blk.3.attn_norm.weight | |
| 59: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.3.ffn_down_exps.weight | |
| 60: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_gate_exps.weight | |
| 61: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_up_exps.weight | |
| 62: 7168 | 7168, 1, 1, 1 | F32 | blk.3.ffn_norm.weight | |
| 63: 256 | 256, 1, 1, 1 | F32 | blk.4.exp_probs_b.bias | |
| 64: 1835008 | 7168, 256, 1, 1 | F32 | blk.4.ffn_gate_inp.weight | |
| 65: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.4.ffn_down_shexp.weight | |
| 66: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_gate_shexp.weight | |
| 67: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_up_shexp.weight | |
| 68: 512 | 512, 1, 1, 1 | F32 | blk.4.attn_kv_a_norm.weight | |
| 69: 4128768 | 7168, 576, 1, 1 | BF16 | blk.4.attn_kv_a_mqa.weight | |
| 70: 16777216 | 512, 32768, 1, 1 | BF16 | blk.4.attn_kv_b.weight | |
| 71: 8388608 | 128, 65536, 1, 1 | BF16 | blk.4.attn_k_b.weight | |
| 72: 8388608 | 512, 16384, 1, 1 | BF16 | blk.4.attn_v_b.weight | |
| 73: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.4.attn_output.weight | |
| 74: 1536 | 1536, 1, 1, 1 | F32 | blk.4.attn_q_a_norm.weight | |
| 75: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.4.attn_q_a.weight | |
| 76: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.4.attn_q_b.weight | |
| 77: 7168 | 7168, 1, 1, 1 | F32 | blk.4.attn_norm.weight | |
| 78: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.4.ffn_down_exps.weight | |
| 79: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.4.ffn_gate_exps.weight |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| * File is LITTLE endian, script is running on a LITTLE endian host. | |
| * Dumping 51 key/value pair(s) | |
| 1: UINT32 | 1 | GGUF.version = 3 | |
| 2: UINT64 | 1 | GGUF.tensor_count = 79 | |
| 3: UINT64 | 1 | GGUF.kv_count = 48 | |
| 4: STRING | 1 | general.architecture = 'deepseek2' | |
| 5: STRING | 1 | general.type = 'model' | |
| 6: STRING | 1 | general.name = 'DeepSeek R1 0528' | |
| 7: STRING | 1 | general.version = '0528' | |
| 8: STRING | 1 | general.basename = 'DeepSeek-R1' | |
| 9: STRING | 1 | general.size_label = '256x21B' | |
| 10: UINT32 | 1 | deepseek2.block_count = 61 | |
| 11: UINT32 | 1 | deepseek2.context_length = 163840 | |
| 12: UINT32 | 1 | deepseek2.embedding_length = 7168 | |
| 13: UINT32 | 1 | deepseek2.feed_forward_length = 18432 | |
| 14: UINT32 | 1 | deepseek2.attention.head_count = 128 | |
| 15: UINT32 | 1 | deepseek2.attention.head_count_kv = 128 | |
| 16: FLOAT32 | 1 | deepseek2.rope.freq_base = 10000.0 | |
| 17: FLOAT32 | 1 | deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07 | |
| 18: UINT32 | 1 | deepseek2.expert_used_count = 8 | |
| 19: UINT32 | 1 | general.file_type = 32 | |
| 20: UINT32 | 1 | deepseek2.leading_dense_block_count = 3 | |
| 21: UINT32 | 1 | deepseek2.vocab_size = 129280 | |
| 22: UINT32 | 1 | deepseek2.attention.q_lora_rank = 1536 | |
| 23: UINT32 | 1 | deepseek2.attention.kv_lora_rank = 512 | |
| 24: UINT32 | 1 | deepseek2.attention.key_length = 192 | |
| 25: UINT32 | 1 | deepseek2.attention.value_length = 128 | |
| 26: UINT32 | 1 | deepseek2.expert_feed_forward_length = 2048 | |
| 27: UINT32 | 1 | deepseek2.expert_count = 256 | |
| 28: UINT32 | 1 | deepseek2.expert_shared_count = 1 | |
| 29: FLOAT32 | 1 | deepseek2.expert_weights_scale = 2.5 | |
| 30: BOOL | 1 | deepseek2.expert_weights_norm = True | |
| 31: UINT32 | 1 | deepseek2.expert_gating_func = 2 | |
| 32: UINT32 | 1 | deepseek2.rope.dimension_count = 64 | |
| 33: STRING | 1 | deepseek2.rope.scaling.type = 'yarn' | |
| 34: FLOAT32 | 1 | deepseek2.rope.scaling.factor = 40.0 | |
| 35: UINT32 | 1 | deepseek2.rope.scaling.original_context_length = 4096 | |
| 36: FLOAT32 | 1 | deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612 | |
| 37: STRING | 1 | tokenizer.ggml.model = 'gpt2' | |
| 38: STRING | 1 | tokenizer.ggml.pre = 'deepseek-v3' | |
| 39: [STRING] | 129280 | tokenizer.ggml.tokens | |
| 40: [INT32] | 129280 | tokenizer.ggml.token_type | |
| 41: [STRING] | 127741 | tokenizer.ggml.merges | |
| 42: UINT32 | 1 | tokenizer.ggml.bos_token_id = 0 | |
| 43: UINT32 | 1 | tokenizer.ggml.eos_token_id = 1 | |
| 44: UINT32 | 1 | tokenizer.ggml.padding_token_id = 1 | |
| 45: BOOL | 1 | tokenizer.ggml.add_bos_token = True | |
| 46: BOOL | 1 | tokenizer.ggml.add_eos_token = False | |
| 47: STRING | 1 | tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene' | |
| 48: UINT32 | 1 | general.quantization_version = 2 | |
| 49: UINT16 | 1 | split.no = 0 | |
| 50: UINT16 | 1 | split.count = 30 | |
| 51: INT32 | 1 | split.tensors.count = 1147 | |
| * Dumping 79 tensor(s) | |
| 1: 926679040 | 7168, 129280, 1, 1 | BF16 | token_embd.weight | |
| 2: 7168 | 7168, 1, 1, 1 | F32 | blk.0.attn_norm.weight | |
| 3: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.0.ffn_down.weight | |
| 4: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_gate.weight | |
| 5: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_up.weight | |
| 6: 7168 | 7168, 1, 1, 1 | F32 | blk.0.ffn_norm.weight | |
| 7: 512 | 512, 1, 1, 1 | F32 | blk.0.attn_kv_a_norm.weight | |
| 8: 4128768 | 7168, 576, 1, 1 | BF16 | blk.0.attn_kv_a_mqa.weight | |
| 9: 16777216 | 512, 32768, 1, 1 | BF16 | blk.0.attn_kv_b.weight | |
| 10: 8388608 | 128, 65536, 1, 1 | BF16 | blk.0.attn_k_b.weight | |
| 11: 8388608 | 512, 16384, 1, 1 | BF16 | blk.0.attn_v_b.weight | |
| 12: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.0.attn_output.weight | |
| 13: 1536 | 1536, 1, 1, 1 | F32 | blk.0.attn_q_a_norm.weight | |
| 14: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.0.attn_q_a.weight | |
| 15: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.0.attn_q_b.weight | |
| 16: 7168 | 7168, 1, 1, 1 | F32 | blk.1.attn_norm.weight | |
| 17: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.1.ffn_down.weight | |
| 18: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_gate.weight | |
| 19: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_up.weight | |
| 20: 7168 | 7168, 1, 1, 1 | F32 | blk.1.ffn_norm.weight | |
| 21: 512 | 512, 1, 1, 1 | F32 | blk.1.attn_kv_a_norm.weight | |
| 22: 4128768 | 7168, 576, 1, 1 | BF16 | blk.1.attn_kv_a_mqa.weight | |
| 23: 16777216 | 512, 32768, 1, 1 | BF16 | blk.1.attn_kv_b.weight | |
| 24: 8388608 | 128, 65536, 1, 1 | BF16 | blk.1.attn_k_b.weight | |
| 25: 8388608 | 512, 16384, 1, 1 | BF16 | blk.1.attn_v_b.weight | |
| 26: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.1.attn_output.weight | |
| 27: 1536 | 1536, 1, 1, 1 | F32 | blk.1.attn_q_a_norm.weight | |
| 28: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.1.attn_q_a.weight | |
| 29: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.1.attn_q_b.weight | |
| 30: 7168 | 7168, 1, 1, 1 | F32 | blk.2.attn_norm.weight | |
| 31: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.2.ffn_down.weight | |
| 32: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_gate.weight | |
| 33: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_up.weight | |
| 34: 7168 | 7168, 1, 1, 1 | F32 | blk.2.ffn_norm.weight | |
| 35: 512 | 512, 1, 1, 1 | F32 | blk.2.attn_kv_a_norm.weight | |
| 36: 4128768 | 7168, 576, 1, 1 | BF16 | blk.2.attn_kv_a_mqa.weight | |
| 37: 16777216 | 512, 32768, 1, 1 | BF16 | blk.2.attn_kv_b.weight | |
| 38: 8388608 | 128, 65536, 1, 1 | BF16 | blk.2.attn_k_b.weight | |
| 39: 8388608 | 512, 16384, 1, 1 | BF16 | blk.2.attn_v_b.weight | |
| 40: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.2.attn_output.weight | |
| 41: 1536 | 1536, 1, 1, 1 | F32 | blk.2.attn_q_a_norm.weight | |
| 42: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.2.attn_q_a.weight | |
| 43: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.2.attn_q_b.weight | |
| 44: 256 | 256, 1, 1, 1 | F32 | blk.3.exp_probs_b.bias | |
| 45: 1835008 | 7168, 256, 1, 1 | F32 | blk.3.ffn_gate_inp.weight | |
| 46: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.3.ffn_down_shexp.weight | |
| 47: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_gate_shexp.weight | |
| 48: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_up_shexp.weight | |
| 49: 512 | 512, 1, 1, 1 | F32 | blk.3.attn_kv_a_norm.weight | |
| 50: 4128768 | 7168, 576, 1, 1 | BF16 | blk.3.attn_kv_a_mqa.weight | |
| 51: 16777216 | 512, 32768, 1, 1 | BF16 | blk.3.attn_kv_b.weight | |
| 52: 8388608 | 128, 65536, 1, 1 | BF16 | blk.3.attn_k_b.weight | |
| 53: 8388608 | 512, 16384, 1, 1 | BF16 | blk.3.attn_v_b.weight | |
| 54: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.3.attn_output.weight | |
| 55: 1536 | 1536, 1, 1, 1 | F32 | blk.3.attn_q_a_norm.weight | |
| 56: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.3.attn_q_a.weight | |
| 57: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.3.attn_q_b.weight | |
| 58: 7168 | 7168, 1, 1, 1 | F32 | blk.3.attn_norm.weight | |
| 59: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.3.ffn_down_exps.weight | |
| 60: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_gate_exps.weight | |
| 61: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_up_exps.weight | |
| 62: 7168 | 7168, 1, 1, 1 | F32 | blk.3.ffn_norm.weight | |
| 63: 256 | 256, 1, 1, 1 | F32 | blk.4.exp_probs_b.bias | |
| 64: 1835008 | 7168, 256, 1, 1 | F32 | blk.4.ffn_gate_inp.weight | |
| 65: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.4.ffn_down_shexp.weight | |
| 66: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_gate_shexp.weight | |
| 67: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_up_shexp.weight | |
| 68: 512 | 512, 1, 1, 1 | F32 | blk.4.attn_kv_a_norm.weight | |
| 69: 4128768 | 7168, 576, 1, 1 | BF16 | blk.4.attn_kv_a_mqa.weight | |
| 70: 16777216 | 512, 32768, 1, 1 | BF16 | blk.4.attn_kv_b.weight | |
| 71: 8388608 | 128, 65536, 1, 1 | BF16 | blk.4.attn_k_b.weight | |
| 72: 8388608 | 512, 16384, 1, 1 | BF16 | blk.4.attn_v_b.weight | |
| 73: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.4.attn_output.weight | |
| 74: 1536 | 1536, 1, 1, 1 | F32 | blk.4.attn_q_a_norm.weight | |
| 75: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.4.attn_q_a.weight | |
| 76: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.4.attn_q_b.weight | |
| 77: 7168 | 7168, 1, 1, 1 | F32 | blk.4.attn_norm.weight | |
| 78: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.4.ffn_down_exps.weight | |
| 79: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.4.ffn_gate_exps.weight |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment