-
-
Save ochafik/595dbe2dc61061691c2762cb2e4d1c14 to your computer and use it in GitHub Desktop.
GGUF template checker against original model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| ''' | |
| curl -r 0-10000000 -L https://huggingface.co/unsloth/Qwen2.5-Coder-32B-Instruct-128K-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf -o frag.gguf | |
| PYTHONPATH=gguf-py/ python -m examples.check_template frag.gguf | |
| PYTHONPATH=gguf-py/ python -m examples.check_gguf unsloth/Qwen2.5-Coder-32B-Instruct-128K-GGUF | |
| ''' | |
| from contextlib import contextmanager | |
| from dataclasses import dataclass | |
| import logging | |
| from os import PathLike | |
| import os | |
| import sys | |
| from pathlib import Path | |
| import tempfile | |
| import traceback | |
| from typing import Any, Dict, Generator, Optional | |
| import requests | |
| logger = logging.getLogger("reader") | |
| # Necessary to load the local gguf package and the scripts | |
| sys.path.insert(0, str(Path(__file__).parent.parent)) | |
| sys.path.insert(0, str(Path(__file__).parent.parent.parent / "scripts")) | |
| from gguf.constants import GGUFValueType, Keys | |
| from gguf.gguf_reader import GGUFReader | |
| from get_chat_template import get_chat_template | |
| class TensorFreeGGUFReader(GGUFReader): | |
| def _build_tensors(self, start_offs, fields): | |
| pass | |
| def read_field(reader, name): | |
| field = reader.fields[name] | |
| if not field.types: | |
| return None | |
| if field.types[:1] == [GGUFValueType.ARRAY]: | |
| itype = field.types[-1] | |
| if itype == GGUFValueType.STRING: | |
| return [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data] | |
| else: | |
| return [pv for idx in field.data for pv in field.parts[idx].tolist()] | |
| elif field.types[0] == GGUFValueType.STRING: | |
| return str(bytes(field.parts[-1]), encoding="utf-8") | |
| else: | |
| assert(field.types[0] in reader.gguf_scalar_to_np) | |
| return field.parts[-1].tolist()[0] | |
| Templates = Dict[str, str] | |
| KVs = Dict[str, Any] | |
| @dataclass | |
| class HuggingFaceModel: | |
| organization: str | |
| name: str | |
| file: Optional[str] | |
| # quant: Optional[str] | |
| @staticmethod | |
| def parse(s: str) -> 'HuggingFaceModel': | |
| [org, name] = s.split('/') | |
| if ':' in name: | |
| [name, quant] = name.split(':') | |
| assert name.endswith('-GGUF') | |
| return HuggingFaceModel(org, name, name[0:-len("-GGUF")] + "-" + quant + ".gguf") | |
| elif name.endswith('-GGUF'): | |
| return HuggingFaceModel(org, name, name[0:-len("-GGUF")] + "-Q4_K_M.gguf") | |
| else: | |
| return HuggingFaceModel(org, name, None) | |
| @property | |
| def url(self) -> str: | |
| return f"https://huggingface.co/{self.organization}/{self.name}/resolve/main/{self.file}" | |
| def find_cached_gguf(model: HuggingFaceModel) -> Optional[Path]: | |
| return None | |
| def fetch_n_first_bytes(url: str, n: int) -> bytes: | |
| response = requests.get(url, headers={"Range": f"bytes=0-{n}"}, allow_redirects=True) | |
| response.raise_for_status() | |
| return response.content | |
| def get_local_gguf_kvs(gguf_path: PathLike) -> KVs: | |
| reader = TensorFreeGGUFReader(gguf_path) | |
| return {key: read_field(reader, key) for key in reader.fields.keys()} | |
| def get_hf_gguf_kvs(model: HuggingFaceModel) -> KVs: | |
| cached_file = find_cached_gguf(model) | |
| if cached_file: | |
| return get_local_gguf_kvs(cached_file) | |
| else: | |
| # TODO: retry w/ increasingly larger n | |
| n = 10000000 | |
| with tempfile.NamedTemporaryFile(delete=False) as temp_file: | |
| temp_file.write(fetch_n_first_bytes(model.url, n)) | |
| return get_local_gguf_kvs(Path(temp_file.name)) | |
| def get_templates_from_hf(model: HuggingFaceModel) -> Templates: | |
| print(f'Reading template from {model}') | |
| # raise Exception(str(model)) | |
| return { | |
| "default": get_chat_template(f"{model.organization}/{model.name}") | |
| } | |
| def get_templates_from_kvs(kvs: KVs) -> Templates: | |
| out = kvs.get(Keys.Tokenizer.CHAT_TEMPLATES, {}) | |
| if Keys.Tokenizer.CHAT_TEMPLATE in kvs: | |
| out['default'] = kvs[Keys.Tokenizer.CHAT_TEMPLATE] | |
| # TODO: handle Keys.Tokenizer.CHAT_TEMPLATE_N | |
| return out | |
| def infer_hf_model_from_kvs(kvs: Dict[str, Any]) -> HuggingFaceModel: | |
| organization = kvs.get(Keys.General.ORGANIZATION) | |
| if not organization and kvs.get(Keys.Tokenizer.PRE) == 'deepseek-r1-qwen': | |
| organization = "deepseek-ai" | |
| assert isinstance(organization, str), f'Expected {Keys.General.ORGANIZATION} KV, got {organization}' | |
| name = kvs.get(Keys.General.NAME) | |
| assert isinstance(name, str), f'Expected {Keys.General.NAME} KV, got {name}' | |
| return HuggingFaceModel( | |
| organization=organization.replace(' ', '-'), | |
| name=name.replace(' ', '-'), | |
| quant=None) | |
| def compare_templates(context: str, kvs: KVs, expected: Templates, actual: Templates) -> None: | |
| default_expected = expected['default'] | |
| default_actual = actual['default'] | |
| assert default_actual == default_expected, f'[{context}] ❌ Mismatch of default template:\n<template name="expected">\n{default_expected}\n</template>\n<template name="actual">\n{default_actual}\n</template>' | |
| print(f"[{context}] ✅ Templates match") | |
| def check_gguf(context: str, kvs: KVs) -> None: | |
| try: | |
| actual_templates = get_templates_from_kvs(kvs) | |
| reference_hf_repo = infer_hf_model_from_kvs(kvs) | |
| expected_templates = get_templates_from_hf(reference_hf_repo) | |
| compare_templates(context, kvs, expected_templates, actual_templates) | |
| except Exception as e: | |
| for k, v in kvs.items(): | |
| print(f'{k}: {v[0:100]}' if isinstance(v, str) else f'{k}: {v}' if isinstance(v, (int, float)) else k) | |
| raise e | |
| HOME = Path(os.environ['HOME']) | |
| CACHE = \ | |
| HOME / "Library" / "Caches" if sys.platform == 'darwin' \ | |
| else HOME / "APPDATA" if os.name == 'nt' \ | |
| else HOME / ".cache" | |
| LLAMA_CACHE = Path(os.environ.get('LLAMA_CACHE', (CACHE / "llama.cpp").as_posix())) | |
| if __name__ == '__main__': | |
| args = sys.argv[1:] | |
| if len(args) == 0: | |
| args = [p.as_posix() for p in LLAMA_CACHE.glob('*.gguf')] | |
| for arg in args: | |
| try: | |
| print(f"# {arg}") | |
| if os.path.exists(arg): | |
| check_gguf(arg, get_local_gguf_kvs(Path(arg))) | |
| else: | |
| check_gguf(arg, get_hf_gguf_kvs(HuggingFaceModel.parse(arg))) | |
| except Exception as e: | |
| print(f"! {arg}: {e}") | |
| traceback.print_exc() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment