Created
January 30, 2024 16:31
-
-
Save 152334H/e450483235d0522c5574168a84a6ce1c to your computer and use it in GitHub Desktop.
llama.cpp (fbe7dfa53caff0a7e830b676e6e949917a5c71b4) patch for miqu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py | |
index dbf89150..eeea947f 100755 | |
--- a/gguf-py/scripts/gguf-dump.py | |
+++ b/gguf-py/scripts/gguf-dump.py | |
@@ -24,6 +24,56 @@ def get_file_host_endian(reader: GGUFReader) -> tuple[str, str]: | |
file_endian = host_endian | |
return (host_endian, file_endian) | |
+from tqdm import tqdm | |
+import gguf.tensor_mapping as tm | |
+import re | |
+import torch | |
+LAYERS = 80 | |
+INT_REGEX = re.compile(r'\.[0-9]+\.') | |
+def get_layer_idx(k: str): return int(INT_REGEX.search(k).group()[1:-1]) | |
+def possibly_fix_key(k: str): | |
+ ''' | |
+ [ 8192 32000] output.weight -> lm_head.linear | |
+ [ 8192 32000] token_embd.weight -> transformer.embd.wte | |
+ [8192] output_norm.weight -> lm_head.ln | |
+ [8192] blk.79.attn_norm.weight -> model.layers.layers.79.norm | |
+ [8192] blk.79.ffn_norm.weight -> h.79.ln_2 | |
+ [28672 8192] blk.79.ffn_down.weight -> model.layers.layers.79.mlp.down_proj | |
+ [ 8192 28672] blk.79.ffn_gate.weight -> model.layers.layers.79.mlp.gate_proj | |
+ [ 8192 28672] blk.79.ffn_up.weight -> model.layers.layers.79.mlp.up_proj | |
+ [8192 1024] blk.79.attn_k.weight -> model.layers.layers.79.self_attn.k_proj | |
+ [8192 8192] blk.79.attn_output.weight -> model.layers.layers.79.self_attn.o_proj | |
+ [8192 8192] blk.79.attn_q.weight -> model.layers.layers.79.self_attn.q_proj | |
+ [8192 1024] blk.79.attn_v.weight -> model.layers.layers.79.self_attn.v_proj | |
+ ''' | |
+ if k[0] == 'h' and k[-4:] == 'ln_2': # blk.79.ffn_norm.weight -> h.79.ln_2 | |
+ k = f'model.layers.{get_layer_idx(k)}.post_attention_layernorm' | |
+ elif k[-4:] == 'norm': | |
+ k = f'model.layers.{get_layer_idx(k)}.input_layernorm' | |
+ | |
+ D = {'lm_head.linear': 'lm_head', 'transformer.embd.wte': 'model.embed_tokens', 'lm_head.ln': 'model.norm'} | |
+ if k in D: k = D[k] | |
+ | |
+ k = k.replace('layers.layers', 'layers') | |
+ k += '.weight' | |
+ | |
+ return k | |
+# see https://github.com/ggerganov/llama.cpp/blob/master/convert.py#L1182 | |
+def possibly_permute(t: torch.Tensor, k: str): | |
+ if 'q_proj' in k or 'k_proj' in k: | |
+ H = 64 if 'q_proj' in k else 8 | |
+ return t.reshape(H, t.shape[0] // H // 2, 2, *t.shape[1:]).swapaxes(1,2).reshape(t.shape) | |
+ return t | |
+def convert_to_hf(r: GGUFReader): | |
+ d = {v[1]:k for k,v in tm.get_tensor_name_map(tm.MODEL_ARCH.LLAMA, LAYERS).mapping.items()} | |
+ for rt in r.tensors: print(rt.shape, '\t', rt.name, ' -> ', d[rt.name[:-7]]) | |
+ assert all(rt.data.dtype == np.float32 or rt.data.dtype == np.float16 for rt in r.tensors) | |
+ sd = { | |
+ possibly_fix_key(d[rt.name[:-7]]): torch.from_numpy(rt.data).half().view(*reversed(rt.shape.tolist())) | |
+ for rt in tqdm(r.tensors) | |
+ } | |
+ sd = {k: possibly_permute(v, k) for k,v in tqdm(sd.items()}) | |
+ return sd | |
# For more information about what field.parts and field.data represent, | |
# please see the comments in the modify_gguf.py example. | |
@@ -103,10 +153,13 @@ def main() -> None: | |
parser.add_argument("--no-tensors", action="store_true", help="Don't dump tensor metadata") | |
parser.add_argument("--json", action="store_true", help="Produce JSON output") | |
parser.add_argument("--json-array", action="store_true", help="Include full array values in JSON output (long)") | |
+ parser.add_argument("--to-hf", action="store_true") | |
args = parser.parse_args(None if len(sys.argv) > 1 else ["--help"]) | |
if not args.json: | |
print(f'* Loading: {args.model}') | |
reader = GGUFReader(args.model, 'r') | |
+ if args.to_hf: | |
+ return convert_to_hf(reader) | |
if args.json: | |
dump_metadata_json(reader, args) | |
else: | |
@@ -114,4 +167,5 @@ def main() -> None: | |
if __name__ == '__main__': | |
- main() | |
+ c = main() | |
+ torch.save(c,"hf_pm2.pt") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment