Skip to content

Instantly share code, notes, and snippets.

@xzuyn
Last active April 7, 2024 03:49
Show Gist options
  • Select an option

  • Save xzuyn/765157fa27738a9888dcb4e0aa3f5118 to your computer and use it in GitHub Desktop.

Select an option

Save xzuyn/765157fa27738a9888dcb4e0aa3f5118 to your computer and use it in GitHub Desktop.
import json
def load_json_or_jsonl(file_path):
try:
with open(file_path, "r") as file:
try:
# Try loading the entire file as JSON
data = json.load(file)
return data
except json.JSONDecodeError:
# If loading as JSON fails, try loading as JSON Lines
file.seek(0) # Reset file pointer to the beginning
lines = file.readlines()
json_lines_data = []
for line in lines:
try:
item = json.loads(line.strip())
json_lines_data.append(item)
except json.JSONDecodeError as e:
print(f"Error decoding JSON in line: {e}")
return json_lines_data
except FileNotFoundError:
print(f"File not found: {file_path}")
return None
def convert_to_desired_format(data):
converted_data = []
for item in data:
prompt = item["instruction"].strip()
chosen_response = item["chosen_response"].strip()
rejected_response = item["rejected_response"].strip()
if prompt and chosen_response and rejected_response:
chosen_dialogue = [
{"content": prompt, "role": "user"},
{"content": chosen_response, "role": "assistant"}
]
rejected_dialogue = [
{"content": prompt, "role": "user"},
{"content": rejected_response, "role": "assistant"}
]
converted_item = {
"prompt": prompt,
"chosen": chosen_dialogue,
"rejected": rejected_dialogue
}
converted_data.append(converted_item)
return converted_data
json_data = load_json_or_jsonl(
"./downloaded_datasets/argilla_distilabel-math-preference-dpo-train.jsonl"
)
# Convert data to desired format
converted_data = convert_to_desired_format(json_data)
# Write converted data to a new JSON file
with open(
'./downloaded_datasets/argilla/argilla_distilabel-math-preference-dpo-train.json',
'w'
) as f:
json.dump(converted_data, f, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment