Created
October 24, 2024 11:16
-
-
Save ngxson/62e897b6bfc325e4bcf52d16444a8025 to your computer and use it in GitHub Desktop.
generate dummy model FIM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gguf | |
def decode_field(field: gguf.ReaderField): | |
if field and field.types: | |
main_type = field.types[0] | |
if main_type == gguf.GGUFValueType.ARRAY: | |
sub_type = field.types[-1] | |
if sub_type == gguf.GGUFValueType.STRING: | |
return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data] | |
else: | |
return [pv for idx in field.data for pv in field.parts[idx].tolist()] | |
if main_type == gguf.GGUFValueType.STRING: | |
return str(bytes(field.parts[-1]), encoding='utf-8') | |
else: | |
return field.parts[-1][0] | |
return None | |
reader = gguf.GGUFReader('stories260K.gguf', 'r') | |
print(reader) | |
writer = gguf.GGUFWriter('stories260K-infill.gguf', arch='llama') | |
for field in reader.fields.values(): | |
if field.name == 'general.architecture': continue | |
writer.add_key_value(field.name, decode_field(field), field.types[0]) | |
writer.add_uint32("tokenizer.ggml.fim_pre_token_id", 500) | |
writer.add_uint32("tokenizer.ggml.fim_suf_token_id", 501) | |
writer.add_uint32("tokenizer.ggml.fim_mid_token_id", 502) | |
writer.add_uint32("tokenizer.ggml.fim_pad_token_id", 503) | |
writer.add_uint32("tokenizer.ggml.fim_rep_token_id", 504) | |
writer.add_uint32("tokenizer.ggml.fim_sep_token_id", 505) | |
for tensor in reader.tensors: | |
writer.add_tensor_info(tensor.name, tensor.data.shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type) | |
writer.write_header_to_file() | |
writer.write_kv_data_to_file() | |
writer.write_ti_data_to_file() | |
for tensor in reader.tensors: | |
writer.write_tensor_data(tensor.data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment