Skip to content

Instantly share code, notes, and snippets.

@deepfates
Created November 17, 2024 20:26
Show Gist options
  • Save deepfates/d152924514b2099d132a203100dfeb24 to your computer and use it in GitHub Desktop.
Save deepfates/d152924514b2099d132a203100dfeb24 to your computer and use it in GitHub Desktop.
Convert a fine-tuning dataset from OpenAI format to ShareGPT format
import json
import argparse
def convert_oai_to_sharegpt(input_file: str, output_file: str):
with open(input_file, 'r') as infile, open(output_file, 'w') as outfile:
for line in infile:
conversation = json.loads(line)
# Skip system messages
for message in conversation["messages"]:
if message.get("role") == "system":
continue
# Adjust the format for ShareGPT
sharegpt_message = {
"from": message["role"],
"value": message["content"]
}
outfile.write(json.dumps(sharegpt_message) + '\n')
def main():
parser = argparse.ArgumentParser(description='Convert conversations_oai.jsonl to conversations_sharegpt.jsonl')
parser.add_argument('input_file', type=str, help='Input JSONL file (conversations_oai.jsonl)')
parser.add_argument('output_file', type=str, help='Output JSONL file (conversations_sharegpt.jsonl)')
args = parser.parse_args()
convert_oai_to_sharegpt(args.input_file, args.output_file)
print(f'Converted {args.input_file} to {args.output_file} successfully!')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment