prideout · October 15, 2024 12:38
diff --git a/repair.py b/repair.py
 import json

 # Define the input and output file paths
 input_file = 'fused_model/tokenizer.json'
 output_file = 'fused_model/repaired_tokenizer.json'

 # Read the JSON file containing the entire structure
 with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

 # Access the array in "merges" within the "model" key
 merges_array = data.get('model', {}).get('merges', [])

 # Convert the array of 2-tuples into an array of strings (if it's a list of 2-tuples)
 joined_strings = [' '.join(tup) for tup in merges_array if isinstance(tup, list) and len(tup) == 2]

 # Replace the original "merges" array with the joined strings array
 if 'model' in data:
    data['model']['merges'] = joined_strings

 # Write the modified JSON data back to a new file, preserving Unicode characters
 with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

 print(f"Successfully modified the 'merges' array and created {output_file}.")
	import json

	# Define the input and output file paths
	input_file = 'fused_model/tokenizer.json'
	output_file = 'fused_model/repaired_tokenizer.json'

	# Read the JSON file containing the entire structure
	with open(input_file, 'r', encoding='utf-8') as f:
	data = json.load(f)

	# Access the array in "merges" within the "model" key
	merges_array = data.get('model', {}).get('merges', [])

	# Convert the array of 2-tuples into an array of strings (if it's a list of 2-tuples)
	joined_strings = [' '.join(tup) for tup in merges_array if isinstance(tup, list) and len(tup) == 2]

	# Replace the original "merges" array with the joined strings array
	if 'model' in data:
	data['model']['merges'] = joined_strings

	# Write the modified JSON data back to a new file, preserving Unicode characters
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(data, f, ensure_ascii=False, indent=2)

	print(f"Successfully modified the 'merges' array and created {output_file}.")