city96 · April 29, 2025 12:39
diff --git a/deepseek_r1t_chimera_diy_gguf.py b/deepseek_r1t_chimera_diy_gguf.py
 # Attempt to recreate tngtech/DeepSeek-R1T-Chimera from quantized files
 #  based on https://huggingface.co/tngtech/DeepSeek-R1T-Chimera/discussions/1
 #  using:
 #   - https://huggingface.co/unsloth/DeepSeek-R1-GGUF
 #   - https://huggingface.co/unsloth/DeepSeek-V3-0324-GGUF
 # NOTE: The key mapping might not be 100% correct, feel free to experiment
 import gguf
 from tqdm import tqdm

 # I merged the split files using `llama-gguf-split --merge` first
 PATH_R1 = "DeepSeek-R1-UD-Q2_K_XL.gguf"
 PATH_V3 = "DeepSeek-V3-0324-UD-Q2_K_XL.gguf"
 PATH_OUT = "DeepSeek-R1T-Chimera-UD-Q2_K_XL.gguf"

 KEY_MAPPING = {
    "token_embd": "v3",
    "blk.0": "v3",
    "blk.1": "v3",
    "blk.2": "v3",
    "shexp": "v3",
    "exps": "r1",
    "attn": "v3",
    "ffn_gate_inp": "v3",
    # Default is "v3" for the rest
 }

 if __name__ == "__main__":
    reader_r1 = gguf.GGUFReader(PATH_R1)
    tensors_r1 = {x.name:x for x in reader_r1.tensors}
    print(f"Read {len(tensors_r1)} tensors from R1 model")

    reader_v3 = gguf.GGUFReader(PATH_V3)
    tensors_v3 = {x.name:x for x in reader_v3.tensors}
    print(f"Read {len(tensors_v3)} tensors from V3 model")

    if len(tensors_r1) != len(tensors_v3):
        raise ValueError("Invalid tensor count in models")
    
    keys = set(tensors_r1.keys()) | set(tensors_v3.keys())
    keys = sorted(keys) # optional, mostly for readability

    sd = {}
    for key in keys:
        tensor_r1 = tensors_r1[key]
        tensor_v3 = tensors_v3[key]
        
        src = "v3"
        for k,v in KEY_MAPPING.items():
            if k in key:
                src = v
                break
        
        print(f"Using src:{src} for {key:40}")
        val = tensor_r1 if src == "r1" else tensor_v3
        sd[key] = val
    
    writer = writer = gguf.GGUFWriter(PATH_OUT, arch="deepseek2")
    
    # reuse v3 metadata
    for field in reader_v3.fields.values():
        if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
            continue
        writer.add_key_value(field.name, field.contents(), field.types[0])

    # add tensor info
    total_bytes = 0
    for name, tensor in sd.items():
        total_bytes += tensor.n_bytes
        writer.add_tensor_info(
            tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type
        )
    bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)

    writer.write_header_to_file()
    writer.write_kv_data_to_file()
    writer.write_ti_data_to_file()

    # add actual tensors
    for key, tensor in sd.items():
        writer.write_tensor_data(tensor.data)
        bar.update(tensor.n_bytes)
    
    writer.close()
    bar.close()
	# Attempt to recreate tngtech/DeepSeek-R1T-Chimera from quantized files
	# based on https://huggingface.co/tngtech/DeepSeek-R1T-Chimera/discussions/1
	# using:
	# - https://huggingface.co/unsloth/DeepSeek-R1-GGUF
	# - https://huggingface.co/unsloth/DeepSeek-V3-0324-GGUF
	# NOTE: The key mapping might not be 100% correct, feel free to experiment
	import gguf
	from tqdm import tqdm

	# I merged the split files using `llama-gguf-split --merge` first
	PATH_R1 = "DeepSeek-R1-UD-Q2_K_XL.gguf"
	PATH_V3 = "DeepSeek-V3-0324-UD-Q2_K_XL.gguf"
	PATH_OUT = "DeepSeek-R1T-Chimera-UD-Q2_K_XL.gguf"

	KEY_MAPPING = {
	"token_embd": "v3",
	"blk.0": "v3",
	"blk.1": "v3",
	"blk.2": "v3",
	"shexp": "v3",
	"exps": "r1",
	"attn": "v3",
	"ffn_gate_inp": "v3",
	# Default is "v3" for the rest
	}

	if __name__ == "__main__":
	reader_r1 = gguf.GGUFReader(PATH_R1)
	tensors_r1 = {x.name:x for x in reader_r1.tensors}
	print(f"Read {len(tensors_r1)} tensors from R1 model")

	reader_v3 = gguf.GGUFReader(PATH_V3)
	tensors_v3 = {x.name:x for x in reader_v3.tensors}
	print(f"Read {len(tensors_v3)} tensors from V3 model")

	if len(tensors_r1) != len(tensors_v3):
	raise ValueError("Invalid tensor count in models")

	keys = set(tensors_r1.keys()) \| set(tensors_v3.keys())
	keys = sorted(keys) # optional, mostly for readability

	sd = {}
	for key in keys:
	tensor_r1 = tensors_r1[key]
	tensor_v3 = tensors_v3[key]

	src = "v3"
	for k,v in KEY_MAPPING.items():
	if k in key:
	src = v
	break

	print(f"Using src:{src} for {key:40}")
	val = tensor_r1 if src == "r1" else tensor_v3
	sd[key] = val

	writer = writer = gguf.GGUFWriter(PATH_OUT, arch="deepseek2")

	# reuse v3 metadata
	for field in reader_v3.fields.values():
	if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
	continue
	writer.add_key_value(field.name, field.contents(), field.types[0])

	# add tensor info
	total_bytes = 0
	for name, tensor in sd.items():
	total_bytes += tensor.n_bytes
	writer.add_tensor_info(
	tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type
	)
	bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)

	writer.write_header_to_file()
	writer.write_kv_data_to_file()
	writer.write_ti_data_to_file()

	# add actual tensors
	for key, tensor in sd.items():
	writer.write_tensor_data(tensor.data)
	bar.update(tensor.n_bytes)

	writer.close()
	bar.close()