Skip to content

Instantly share code, notes, and snippets.

@city96
Created April 29, 2025 12:39
Show Gist options
  • Save city96/a05cb7ec6664a5085efb007497f2049b to your computer and use it in GitHub Desktop.
Save city96/a05cb7ec6664a5085efb007497f2049b to your computer and use it in GitHub Desktop.
Attempt to recreate tngtech/DeepSeek-R1T-Chimera from quantized files
# Attempt to recreate tngtech/DeepSeek-R1T-Chimera from quantized files
# based on https://huggingface.co/tngtech/DeepSeek-R1T-Chimera/discussions/1
# using:
# - https://huggingface.co/unsloth/DeepSeek-R1-GGUF
# - https://huggingface.co/unsloth/DeepSeek-V3-0324-GGUF
# NOTE: The key mapping might not be 100% correct, feel free to experiment
import gguf
from tqdm import tqdm
# I merged the split files using `llama-gguf-split --merge` first
PATH_R1 = "DeepSeek-R1-UD-Q2_K_XL.gguf"
PATH_V3 = "DeepSeek-V3-0324-UD-Q2_K_XL.gguf"
PATH_OUT = "DeepSeek-R1T-Chimera-UD-Q2_K_XL.gguf"
KEY_MAPPING = {
"token_embd": "v3",
"blk.0": "v3",
"blk.1": "v3",
"blk.2": "v3",
"shexp": "v3",
"exps": "r1",
"attn": "v3",
"ffn_gate_inp": "v3",
# Default is "v3" for the rest
}
if __name__ == "__main__":
reader_r1 = gguf.GGUFReader(PATH_R1)
tensors_r1 = {x.name:x for x in reader_r1.tensors}
print(f"Read {len(tensors_r1)} tensors from R1 model")
reader_v3 = gguf.GGUFReader(PATH_V3)
tensors_v3 = {x.name:x for x in reader_v3.tensors}
print(f"Read {len(tensors_v3)} tensors from V3 model")
if len(tensors_r1) != len(tensors_v3):
raise ValueError("Invalid tensor count in models")
keys = set(tensors_r1.keys()) | set(tensors_v3.keys())
keys = sorted(keys) # optional, mostly for readability
sd = {}
for key in keys:
tensor_r1 = tensors_r1[key]
tensor_v3 = tensors_v3[key]
src = "v3"
for k,v in KEY_MAPPING.items():
if k in key:
src = v
break
print(f"Using src:{src} for {key:40}")
val = tensor_r1 if src == "r1" else tensor_v3
sd[key] = val
writer = writer = gguf.GGUFWriter(PATH_OUT, arch="deepseek2")
# reuse v3 metadata
for field in reader_v3.fields.values():
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
continue
writer.add_key_value(field.name, field.contents(), field.types[0])
# add tensor info
total_bytes = 0
for name, tensor in sd.items():
total_bytes += tensor.n_bytes
writer.add_tensor_info(
tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type
)
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
writer.write_header_to_file()
writer.write_kv_data_to_file()
writer.write_ti_data_to_file()
# add actual tensors
for key, tensor in sd.items():
writer.write_tensor_data(tensor.data)
bar.update(tensor.n_bytes)
writer.close()
bar.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment