Skip to content

Instantly share code, notes, and snippets.

@ochafik
Last active March 7, 2025 17:32
Show Gist options
  • Select an option

  • Save ochafik/595dbe2dc61061691c2762cb2e4d1c14 to your computer and use it in GitHub Desktop.

Select an option

Save ochafik/595dbe2dc61061691c2762cb2e4d1c14 to your computer and use it in GitHub Desktop.
GGUF template checker against original model
#!/usr/bin/env python3
'''
curl -r 0-10000000 -L https://huggingface.co/unsloth/Qwen2.5-Coder-32B-Instruct-128K-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf -o frag.gguf
PYTHONPATH=gguf-py/ python -m examples.check_template frag.gguf
PYTHONPATH=gguf-py/ python -m examples.check_gguf unsloth/Qwen2.5-Coder-32B-Instruct-128K-GGUF
'''
from contextlib import contextmanager
from dataclasses import dataclass
import logging
from os import PathLike
import os
import sys
from pathlib import Path
import tempfile
import traceback
from typing import Any, Dict, Generator, Optional
import requests
logger = logging.getLogger("reader")
# Necessary to load the local gguf package and the scripts
sys.path.insert(0, str(Path(__file__).parent.parent))
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "scripts"))
from gguf.constants import GGUFValueType, Keys
from gguf.gguf_reader import GGUFReader
from get_chat_template import get_chat_template
class TensorFreeGGUFReader(GGUFReader):
def _build_tensors(self, start_offs, fields):
pass
def read_field(reader, name):
field = reader.fields[name]
if not field.types:
return None
if field.types[:1] == [GGUFValueType.ARRAY]:
itype = field.types[-1]
if itype == GGUFValueType.STRING:
return [str(bytes(field.parts[idx]), encoding="utf-8") for idx in field.data]
else:
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
elif field.types[0] == GGUFValueType.STRING:
return str(bytes(field.parts[-1]), encoding="utf-8")
else:
assert(field.types[0] in reader.gguf_scalar_to_np)
return field.parts[-1].tolist()[0]
Templates = Dict[str, str]
KVs = Dict[str, Any]
@dataclass
class HuggingFaceModel:
organization: str
name: str
file: Optional[str]
# quant: Optional[str]
@staticmethod
def parse(s: str) -> 'HuggingFaceModel':
[org, name] = s.split('/')
if ':' in name:
[name, quant] = name.split(':')
assert name.endswith('-GGUF')
return HuggingFaceModel(org, name, name[0:-len("-GGUF")] + "-" + quant + ".gguf")
elif name.endswith('-GGUF'):
return HuggingFaceModel(org, name, name[0:-len("-GGUF")] + "-Q4_K_M.gguf")
else:
return HuggingFaceModel(org, name, None)
@property
def url(self) -> str:
return f"https://huggingface.co/{self.organization}/{self.name}/resolve/main/{self.file}"
def find_cached_gguf(model: HuggingFaceModel) -> Optional[Path]:
return None
def fetch_n_first_bytes(url: str, n: int) -> bytes:
response = requests.get(url, headers={"Range": f"bytes=0-{n}"}, allow_redirects=True)
response.raise_for_status()
return response.content
def get_local_gguf_kvs(gguf_path: PathLike) -> KVs:
reader = TensorFreeGGUFReader(gguf_path)
return {key: read_field(reader, key) for key in reader.fields.keys()}
def get_hf_gguf_kvs(model: HuggingFaceModel) -> KVs:
cached_file = find_cached_gguf(model)
if cached_file:
return get_local_gguf_kvs(cached_file)
else:
# TODO: retry w/ increasingly larger n
n = 10000000
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(fetch_n_first_bytes(model.url, n))
return get_local_gguf_kvs(Path(temp_file.name))
def get_templates_from_hf(model: HuggingFaceModel) -> Templates:
print(f'Reading template from {model}')
# raise Exception(str(model))
return {
"default": get_chat_template(f"{model.organization}/{model.name}")
}
def get_templates_from_kvs(kvs: KVs) -> Templates:
out = kvs.get(Keys.Tokenizer.CHAT_TEMPLATES, {})
if Keys.Tokenizer.CHAT_TEMPLATE in kvs:
out['default'] = kvs[Keys.Tokenizer.CHAT_TEMPLATE]
# TODO: handle Keys.Tokenizer.CHAT_TEMPLATE_N
return out
def infer_hf_model_from_kvs(kvs: Dict[str, Any]) -> HuggingFaceModel:
organization = kvs.get(Keys.General.ORGANIZATION)
if not organization and kvs.get(Keys.Tokenizer.PRE) == 'deepseek-r1-qwen':
organization = "deepseek-ai"
assert isinstance(organization, str), f'Expected {Keys.General.ORGANIZATION} KV, got {organization}'
name = kvs.get(Keys.General.NAME)
assert isinstance(name, str), f'Expected {Keys.General.NAME} KV, got {name}'
return HuggingFaceModel(
organization=organization.replace(' ', '-'),
name=name.replace(' ', '-'),
quant=None)
def compare_templates(context: str, kvs: KVs, expected: Templates, actual: Templates) -> None:
default_expected = expected['default']
default_actual = actual['default']
assert default_actual == default_expected, f'[{context}] ❌ Mismatch of default template:\n<template name="expected">\n{default_expected}\n</template>\n<template name="actual">\n{default_actual}\n</template>'
print(f"[{context}] ✅ Templates match")
def check_gguf(context: str, kvs: KVs) -> None:
try:
actual_templates = get_templates_from_kvs(kvs)
reference_hf_repo = infer_hf_model_from_kvs(kvs)
expected_templates = get_templates_from_hf(reference_hf_repo)
compare_templates(context, kvs, expected_templates, actual_templates)
except Exception as e:
for k, v in kvs.items():
print(f'{k}: {v[0:100]}' if isinstance(v, str) else f'{k}: {v}' if isinstance(v, (int, float)) else k)
raise e
HOME = Path(os.environ['HOME'])
CACHE = \
HOME / "Library" / "Caches" if sys.platform == 'darwin' \
else HOME / "APPDATA" if os.name == 'nt' \
else HOME / ".cache"
LLAMA_CACHE = Path(os.environ.get('LLAMA_CACHE', (CACHE / "llama.cpp").as_posix()))
if __name__ == '__main__':
args = sys.argv[1:]
if len(args) == 0:
args = [p.as_posix() for p in LLAMA_CACHE.glob('*.gguf')]
for arg in args:
try:
print(f"# {arg}")
if os.path.exists(arg):
check_gguf(arg, get_local_gguf_kvs(Path(arg)))
else:
check_gguf(arg, get_hf_gguf_kvs(HuggingFaceModel.parse(arg)))
except Exception as e:
print(f"! {arg}: {e}")
traceback.print_exc()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment