Created
September 12, 2023 19:50
-
-
Save TheBloke/b7a45d3e5ff1432f90aa221de6a5fb08 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(pytorch2) ubuntu@a10:/workspace/git/gguf-llama (master ✔) ᐅ ./main -m /workspace/spicyboros-70B-2.2.Q4_0.gguf -c 4096 -p "A chat.\nUSER: Write a story about llamas\nASSISTANT:" -n 128 | |
Log start | |
main: build = 1215 (89e8959) | |
main: seed = 1694547445 | |
ggml_init_cublas: found 1 CUDA devices: | |
Device 0: NVIDIA A10, compute capability 8.6 | |
llama_model_loader: loaded meta data with 20 key-value pairs and 723 tensors from /workspace/spicyboros-70B-2.2.Q4_0.gguf (version GGUF V2 (latest)) | |
llama_model_loader: - tensor 0: token_embd.weight q4_0 [ 8192, 32000, 1, 1 ] | |
llama_model_loader: - tensor 1: blk.0.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 2: blk.0.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 3: blk.0.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 4: blk.0.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 5: blk.0.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 6: blk.0.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 7: blk.0.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 8: blk.0.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 9: blk.0.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 10: blk.1.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 11: blk.1.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 12: blk.1.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 13: blk.1.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 14: blk.1.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 15: blk.1.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 16: blk.1.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 17: blk.1.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 18: blk.1.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 19: blk.2.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 20: blk.2.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 21: blk.2.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 22: blk.2.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 23: blk.2.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 24: blk.2.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 25: blk.2.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 26: blk.2.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 27: blk.2.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 28: blk.3.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 29: blk.3.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 30: blk.3.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 31: blk.3.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 32: blk.3.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 33: blk.3.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 34: blk.3.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 35: blk.3.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 36: blk.3.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 37: blk.4.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 38: blk.4.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 39: blk.4.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 40: blk.4.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 41: blk.4.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 42: blk.4.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 43: blk.4.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 44: blk.4.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 45: blk.4.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 46: blk.5.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 47: blk.5.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 48: blk.5.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 49: blk.5.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 50: blk.5.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 51: blk.5.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 52: blk.5.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 53: blk.5.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 54: blk.5.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 55: blk.6.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 56: blk.6.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 57: blk.6.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 58: blk.6.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 59: blk.6.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 60: blk.6.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 61: blk.6.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 62: blk.6.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 63: blk.6.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 64: blk.7.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 65: blk.7.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 66: blk.7.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 67: blk.7.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 68: blk.7.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 69: blk.7.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 70: blk.7.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 71: blk.7.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 72: blk.7.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 73: blk.8.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 74: blk.8.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 75: blk.8.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 76: blk.8.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 77: blk.8.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 78: blk.8.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 79: blk.8.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 80: blk.8.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 81: blk.8.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 82: blk.9.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 83: blk.9.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 84: blk.9.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 85: blk.9.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 86: blk.9.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 87: blk.9.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 88: blk.9.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 89: blk.9.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 90: blk.9.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 91: blk.10.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 92: blk.10.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 93: blk.10.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 94: blk.10.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 95: blk.10.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 96: blk.10.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 97: blk.10.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 98: blk.10.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 99: blk.10.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 100: blk.11.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 101: blk.11.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 102: blk.11.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 103: blk.11.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 104: blk.11.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 105: blk.11.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 106: blk.11.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 107: blk.11.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 108: blk.11.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 109: blk.12.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 110: blk.12.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 111: blk.12.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 112: blk.12.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 113: blk.12.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 114: blk.12.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 115: blk.12.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 116: blk.12.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 117: blk.12.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 118: blk.13.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 119: blk.13.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 120: blk.13.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 121: blk.13.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 122: blk.13.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 123: blk.13.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 124: blk.13.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 125: blk.13.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 126: blk.13.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 127: blk.14.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 128: blk.14.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 129: blk.14.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 130: blk.14.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 131: blk.14.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 132: blk.14.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 133: blk.14.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 134: blk.14.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 135: blk.14.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 136: blk.15.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 137: blk.15.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 138: blk.15.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 139: blk.15.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 140: blk.15.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 141: blk.15.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 142: blk.15.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 143: blk.15.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 144: blk.15.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 145: blk.16.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 146: blk.16.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 147: blk.16.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 148: blk.16.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 149: blk.16.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 150: blk.16.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 151: blk.16.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 152: blk.16.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 153: blk.16.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 154: blk.17.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 155: blk.17.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 156: blk.17.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 157: blk.17.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 158: blk.17.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 159: blk.17.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 160: blk.17.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 161: blk.17.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 162: blk.17.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 163: blk.18.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 164: blk.18.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 165: blk.18.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 166: blk.18.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 167: blk.18.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 168: blk.18.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 169: blk.18.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 170: blk.18.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 171: blk.18.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 172: blk.19.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 173: blk.19.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 174: blk.19.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 175: blk.19.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 176: blk.19.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 177: blk.19.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 178: blk.19.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 179: blk.19.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 180: blk.19.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 181: blk.20.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 182: blk.20.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 183: blk.20.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 184: blk.20.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 185: blk.20.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 186: blk.20.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 187: blk.20.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 188: blk.20.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 189: blk.20.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 190: blk.21.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 191: blk.21.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 192: blk.21.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 193: blk.21.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 194: blk.21.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 195: blk.21.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 196: blk.21.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 197: blk.21.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 198: blk.21.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 199: blk.22.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 200: blk.22.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 201: blk.22.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 202: blk.22.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 203: blk.22.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 204: blk.22.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 205: blk.22.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 206: blk.22.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 207: blk.22.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 208: blk.23.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 209: blk.23.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 210: blk.23.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 211: blk.23.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 212: blk.23.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 213: blk.23.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 214: blk.23.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 215: blk.23.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 216: blk.23.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 217: blk.24.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 218: blk.24.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 219: blk.24.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 220: blk.24.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 221: blk.24.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 222: blk.24.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 223: blk.24.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 224: blk.24.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 225: blk.24.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 226: blk.25.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 227: blk.25.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 228: blk.25.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 229: blk.25.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 230: blk.25.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 231: blk.25.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 232: blk.25.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 233: blk.25.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 234: blk.25.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 235: blk.26.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 236: blk.26.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 237: blk.26.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 238: blk.26.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 239: blk.26.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 240: blk.26.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 241: blk.26.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 242: blk.26.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 243: blk.26.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 244: blk.27.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 245: blk.27.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 246: blk.27.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 247: blk.27.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 248: blk.27.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 249: blk.27.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 250: blk.27.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 251: blk.27.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 252: blk.27.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 253: blk.28.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 254: blk.28.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 255: blk.28.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 256: blk.28.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 257: blk.28.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 258: blk.28.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 259: blk.28.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 260: blk.28.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 261: blk.28.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 262: blk.29.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 263: blk.29.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 264: blk.29.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 265: blk.29.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 266: blk.29.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 267: blk.29.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 268: blk.29.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 269: blk.29.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 270: blk.29.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 271: blk.30.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 272: blk.30.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 273: blk.30.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 274: blk.30.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 275: blk.30.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 276: blk.30.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 277: blk.30.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 278: blk.30.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 279: blk.30.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 280: blk.31.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 281: blk.31.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 282: blk.31.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 283: blk.31.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 284: blk.31.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 285: blk.31.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 286: blk.31.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 287: blk.31.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 288: blk.31.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 289: blk.32.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 290: blk.32.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 291: blk.32.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 292: blk.32.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 293: blk.32.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 294: blk.32.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 295: blk.32.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 296: blk.32.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 297: blk.32.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 298: blk.33.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 299: blk.33.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 300: blk.33.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 301: blk.33.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 302: blk.33.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 303: blk.33.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 304: blk.33.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 305: blk.33.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 306: blk.33.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 307: blk.34.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 308: blk.34.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 309: blk.34.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 310: blk.34.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 311: blk.34.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 312: blk.34.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 313: blk.34.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 314: blk.34.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 315: blk.34.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 316: blk.35.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 317: blk.35.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 318: blk.35.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 319: blk.35.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 320: blk.35.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 321: blk.35.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 322: blk.35.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 323: blk.35.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 324: blk.35.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 325: blk.36.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 326: blk.36.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 327: blk.36.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 328: blk.36.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 329: blk.36.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 330: blk.36.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 331: blk.36.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 332: blk.36.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 333: blk.36.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 334: blk.37.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 335: blk.37.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 336: blk.37.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 337: blk.37.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 338: blk.37.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 339: blk.37.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 340: blk.37.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 341: blk.37.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 342: blk.37.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 343: blk.38.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 344: blk.38.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 345: blk.38.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 346: blk.38.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 347: blk.38.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 348: blk.38.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 349: blk.38.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 350: blk.38.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 351: blk.38.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 352: blk.39.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 353: blk.39.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 354: blk.39.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 355: blk.39.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 356: blk.39.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 357: blk.39.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 358: blk.39.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 359: blk.39.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 360: blk.39.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 361: blk.40.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 362: blk.40.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 363: blk.40.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 364: blk.40.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 365: blk.40.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 366: blk.40.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 367: blk.40.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 368: blk.40.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 369: blk.40.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 370: blk.41.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 371: blk.41.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 372: blk.41.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 373: blk.41.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 374: blk.41.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 375: blk.41.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 376: blk.41.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 377: blk.41.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 378: blk.41.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 379: blk.42.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 380: blk.42.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 381: blk.42.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 382: blk.42.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 383: blk.42.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 384: blk.42.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 385: blk.42.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 386: blk.42.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 387: blk.42.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 388: blk.43.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 389: blk.43.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 390: blk.43.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 391: blk.43.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 392: blk.43.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 393: blk.43.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 394: blk.43.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 395: blk.43.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 396: blk.43.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 397: blk.44.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 398: blk.44.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 399: blk.44.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 400: blk.44.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 401: blk.44.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 402: blk.44.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 403: blk.44.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 404: blk.44.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 405: blk.44.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 406: blk.45.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 407: blk.45.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 408: blk.45.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 409: blk.45.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 410: blk.45.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 411: blk.45.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 412: blk.45.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 413: blk.45.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 414: blk.45.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 415: blk.46.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 416: blk.46.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 417: blk.46.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 418: blk.46.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 419: blk.46.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 420: blk.46.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 421: blk.46.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 422: blk.46.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 423: blk.46.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 424: blk.47.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 425: blk.47.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 426: blk.47.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 427: blk.47.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 428: blk.47.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 429: blk.47.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 430: blk.47.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 431: blk.47.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 432: blk.47.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 433: blk.48.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 434: blk.48.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 435: blk.48.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 436: blk.48.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 437: blk.48.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 438: blk.48.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 439: blk.48.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 440: blk.48.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 441: blk.48.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 442: blk.49.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 443: blk.49.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 444: blk.49.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 445: blk.49.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 446: blk.49.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 447: blk.49.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 448: blk.49.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 449: blk.49.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 450: blk.49.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 451: blk.50.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 452: blk.50.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 453: blk.50.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 454: blk.50.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 455: blk.50.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 456: blk.50.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 457: blk.50.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 458: blk.50.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 459: blk.50.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 460: blk.51.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 461: blk.51.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 462: blk.51.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 463: blk.51.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 464: blk.51.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 465: blk.51.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 466: blk.51.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 467: blk.51.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 468: blk.51.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 469: blk.52.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 470: blk.52.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 471: blk.52.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 472: blk.52.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 473: blk.52.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 474: blk.52.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 475: blk.52.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 476: blk.52.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 477: blk.52.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 478: blk.53.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 479: blk.53.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 480: blk.53.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 481: blk.53.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 482: blk.53.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 483: blk.53.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 484: blk.53.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 485: blk.53.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 486: blk.53.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 487: blk.54.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 488: blk.54.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 489: blk.54.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 490: blk.54.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 491: blk.54.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 492: blk.54.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 493: blk.54.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 494: blk.54.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 495: blk.54.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 496: blk.55.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 497: blk.55.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 498: blk.55.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 499: blk.55.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 500: blk.55.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 501: blk.55.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 502: blk.55.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 503: blk.55.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 504: blk.55.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 505: blk.56.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 506: blk.56.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 507: blk.56.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 508: blk.56.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 509: blk.56.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 510: blk.56.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 511: blk.56.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 512: blk.56.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 513: blk.56.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 514: blk.57.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 515: blk.57.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 516: blk.57.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 517: blk.57.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 518: blk.57.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 519: blk.57.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 520: blk.57.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 521: blk.57.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 522: blk.57.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 523: blk.58.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 524: blk.58.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 525: blk.58.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 526: blk.58.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 527: blk.58.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 528: blk.58.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 529: blk.58.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 530: blk.58.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 531: blk.58.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 532: blk.59.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 533: blk.59.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 534: blk.59.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 535: blk.59.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 536: blk.59.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 537: blk.59.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 538: blk.59.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 539: blk.59.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 540: blk.59.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 541: blk.60.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 542: blk.60.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 543: blk.60.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 544: blk.60.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 545: blk.60.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 546: blk.60.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 547: blk.60.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 548: blk.60.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 549: blk.60.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 550: blk.61.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 551: blk.61.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 552: blk.61.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 553: blk.61.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 554: blk.61.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 555: blk.61.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 556: blk.61.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 557: blk.61.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 558: blk.61.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 559: blk.62.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 560: blk.62.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 561: blk.62.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 562: blk.62.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 563: blk.62.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 564: blk.62.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 565: blk.62.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 566: blk.62.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 567: blk.62.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 568: blk.63.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 569: blk.63.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 570: blk.63.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 571: blk.63.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 572: blk.63.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 573: blk.63.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 574: blk.63.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 575: blk.63.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 576: blk.63.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 577: blk.64.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 578: blk.64.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 579: blk.64.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 580: blk.64.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 581: blk.64.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 582: blk.64.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 583: blk.64.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 584: blk.64.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 585: blk.64.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 586: blk.65.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 587: blk.65.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 588: blk.65.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 589: blk.65.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 590: blk.65.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 591: blk.65.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 592: blk.65.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 593: blk.65.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 594: blk.65.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 595: blk.66.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 596: blk.66.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 597: blk.66.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 598: blk.66.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 599: blk.66.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 600: blk.66.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 601: blk.66.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 602: blk.66.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 603: blk.66.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 604: blk.67.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 605: blk.67.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 606: blk.67.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 607: blk.67.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 608: blk.67.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 609: blk.67.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 610: blk.67.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 611: blk.67.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 612: blk.67.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 613: blk.68.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 614: blk.68.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 615: blk.68.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 616: blk.68.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 617: blk.68.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 618: blk.68.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 619: blk.68.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 620: blk.68.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 621: blk.68.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 622: blk.69.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 623: blk.69.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 624: blk.69.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 625: blk.69.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 626: blk.69.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 627: blk.69.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 628: blk.69.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 629: blk.69.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 630: blk.69.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 631: blk.70.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 632: blk.70.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 633: blk.70.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 634: blk.70.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 635: blk.70.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 636: blk.70.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 637: blk.70.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 638: blk.70.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 639: blk.70.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 640: blk.71.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 641: blk.71.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 642: blk.71.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 643: blk.71.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 644: blk.71.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 645: blk.71.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 646: blk.71.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 647: blk.71.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 648: blk.71.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 649: blk.72.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 650: blk.72.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 651: blk.72.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 652: blk.72.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 653: blk.72.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 654: blk.72.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 655: blk.72.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 656: blk.72.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 657: blk.72.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 658: blk.73.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 659: blk.73.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 660: blk.73.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 661: blk.73.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 662: blk.73.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 663: blk.73.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 664: blk.73.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 665: blk.73.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 666: blk.73.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 667: blk.74.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 668: blk.74.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 669: blk.74.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 670: blk.74.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 671: blk.74.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 672: blk.74.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 673: blk.74.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 674: blk.74.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 675: blk.74.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 676: blk.75.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 677: blk.75.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 678: blk.75.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 679: blk.75.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 680: blk.75.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 681: blk.75.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 682: blk.75.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 683: blk.75.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 684: blk.75.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 685: blk.76.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 686: blk.76.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 687: blk.76.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 688: blk.76.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 689: blk.76.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 690: blk.76.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 691: blk.76.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 692: blk.76.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 693: blk.76.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 694: blk.77.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 695: blk.77.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 696: blk.77.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 697: blk.77.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 698: blk.77.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 699: blk.77.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 700: blk.77.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 701: blk.77.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 702: blk.77.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 703: blk.78.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 704: blk.78.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 705: blk.78.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 706: blk.78.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 707: blk.78.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 708: blk.78.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 709: blk.78.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 710: blk.78.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 711: blk.78.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 712: blk.79.attn_q.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 713: blk.79.attn_k.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 714: blk.79.attn_v.weight q4_0 [ 8192, 1024, 1, 1 ] | |
llama_model_loader: - tensor 715: blk.79.attn_output.weight q4_0 [ 8192, 8192, 1, 1 ] | |
llama_model_loader: - tensor 716: blk.79.ffn_gate.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 717: blk.79.ffn_up.weight q4_0 [ 8192, 28672, 1, 1 ] | |
llama_model_loader: - tensor 718: blk.79.ffn_down.weight q4_0 [ 28672, 8192, 1, 1 ] | |
llama_model_loader: - tensor 719: blk.79.attn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 720: blk.79.ffn_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 721: output_norm.weight f32 [ 8192, 1, 1, 1 ] | |
llama_model_loader: - tensor 722: output.weight q6_K [ 8192, 32000, 1, 1 ] | |
llama_model_loader: - kv 0: general.architecture str | |
llama_model_loader: - kv 1: general.name str | |
llama_model_loader: - kv 2: llama.context_length u32 | |
llama_model_loader: - kv 3: llama.embedding_length u32 | |
llama_model_loader: - kv 4: llama.block_count u32 | |
llama_model_loader: - kv 5: llama.feed_forward_length u32 | |
llama_model_loader: - kv 6: llama.rope.dimension_count u32 | |
llama_model_loader: - kv 7: llama.attention.head_count u32 | |
llama_model_loader: - kv 8: llama.attention.head_count_kv u32 | |
llama_model_loader: - kv 9: llama.attention.layer_norm_rms_epsilon f32 | |
llama_model_loader: - kv 10: llama.rope.freq_base f32 | |
llama_model_loader: - kv 11: general.file_type u32 | |
llama_model_loader: - kv 12: tokenizer.ggml.model str | |
llama_model_loader: - kv 13: tokenizer.ggml.tokens arr | |
llama_model_loader: - kv 14: tokenizer.ggml.scores arr | |
llama_model_loader: - kv 15: tokenizer.ggml.token_type arr | |
llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32 | |
llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32 | |
llama_model_loader: - kv 18: tokenizer.ggml.unknown_token_id u32 | |
llama_model_loader: - kv 19: general.quantization_version u32 | |
llama_model_loader: - type f32: 161 tensors | |
llama_model_loader: - type q4_0: 561 tensors | |
llama_model_loader: - type q6_K: 1 tensors | |
llm_load_print_meta: format = GGUF V2 (latest) | |
llm_load_print_meta: arch = llama | |
llm_load_print_meta: vocab type = SPM | |
llm_load_print_meta: n_vocab = 32000 | |
llm_load_print_meta: n_merges = 0 | |
llm_load_print_meta: n_ctx_train = 4096 | |
llm_load_print_meta: n_ctx = 4096 | |
llm_load_print_meta: n_embd = 8192 | |
llm_load_print_meta: n_head = 64 | |
llm_load_print_meta: n_head_kv = 8 | |
llm_load_print_meta: n_layer = 80 | |
llm_load_print_meta: n_rot = 128 | |
llm_load_print_meta: n_gqa = 8 | |
llm_load_print_meta: f_norm_eps = 1.0e-05 | |
llm_load_print_meta: f_norm_rms_eps = 1.0e-05 | |
llm_load_print_meta: n_ff = 28672 | |
llm_load_print_meta: freq_base = 10000.0 | |
llm_load_print_meta: freq_scale = 1 | |
llm_load_print_meta: model type = 70B | |
llm_load_print_meta: model ftype = mostly Q4_0 | |
llm_load_print_meta: model size = 68.98 B | |
llm_load_print_meta: general.name = LLaMA v2 | |
llm_load_print_meta: BOS token = 1 '<s>' | |
llm_load_print_meta: EOS token = 2 '</s>' | |
llm_load_print_meta: UNK token = 0 '<unk>' | |
llm_load_print_meta: LF token = 13 '<0x0A>' | |
llm_load_tensors: ggml ctx size = 0.23 MB | |
llm_load_tensors: using CUDA for GPU acceleration | |
llm_load_tensors: mem required = 37070.97 MB (+ 1280.00 MB per state) | |
llm_load_tensors: offloading 0 repeating layers to GPU | |
llm_load_tensors: offloaded 0/83 layers to GPU | |
llm_load_tensors: VRAM used: 0 MB | |
.................................................................................................... | |
llama_new_context_with_model: kv self size = 1280.00 MB | |
llama_new_context_with_model: compute buffer total size = 561.47 MB | |
llama_new_context_with_model: VRAM scratch buffer: 560.00 MB | |
system_info: n_threads = 15 / 30 | AVX = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | | |
sampling: repeat_last_n = 64, repeat_penalty = 1.100000, presence_penalty = 0.000000, frequency_penalty = 0.000000, top_k = 40, tfs_z = 1.000000, top_p = 0.950000, typical_p = 1.000000, temp = 0.800000, mirostat = 0, mirostat_lr = 0.100000, mirostat_ent = 5.000000 | |
generate: n_ctx = 4096, n_batch = 512, n_predict = 128, n_keep = 0 | |
A chat.\nUSER: Write a story about llamas\nASSISTANT:oid◄◄letteakoÝbrieпіbrieroberiaÝiomcych Insertomengenommen prolong Feder Sebbrie◄ fifigliaÝ Matthoauthandro◄loyee◄ obser cabarfgresloyeeigliaMITgenommen◄тистиbrie stat◄ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment