kyo-takano · April 16, 2024 09:31 · kyo-takano · Apr 22, 2024
diff --git a/openai_batch_generation.py b/openai_batch_generation.py
 #!/usr/bin/python

 import json
 import textwrap
 import time

 from openai import OpenAI

 client = OpenAI()

 """
 Create a data file containing a batch of chat messages to complete
 """
 filepath = "data.jsonl"
 messages = [
    dict(
        messages=[
            dict(
                role="system",
                content="Improve the given code through clear and detailed thinking.",
            ),
            dict(
                role="user",
                content=textwrap.dedent("""\
                    ```python
                    from difflib import Differ
                    def get_diff(before, after):
                        differ = Differ()
                        diff = differ.compare(before.split(), after.split())
                        output = ""
                        for line in diff:
                            if line.startswith("  "):
                                output += line[2:] + " "
                            elif line.startswith("- "):
                                output += f"\033[91m{line[2:]}\033[0m"
                            elif line.startswith("+ "):
                                output += f"\033[92m{line[2:]}\033[0m "
                            else:
                                output += line
                        return output
                    ```"""),
            ),
        ],
    ),
    # More conversations here (to make it a batch)
 ]
 with open(filepath, "w") as file:
    for i, message in enumerate(messages):
        message = dict(
            custom_id=f"request-{i}",
            method="POST",
            url="/v1/chat/completions",
            body=dict(model="gpt-4-turbo", messages=[message]),
        )  # https://platform.openai.com/docs/api-reference/batch/requestInput
        json.dump(message, file)
        file.write("\n")


 """
 Upload to the storage (file to be found at https://platform.openai.com/storage/files)
 """
 response = client.files.create(file=open(filepath, "rb"), purpose="batch")
 file_id = response.id
 print(f"{file_id=}")


 """
 Submit a batch job
 """
 response = client.batches.create(
    input_file_id=file_id,
    endpoint="/v1/chat/completions",
    completion_window="24h",
 )
 batch_id = response.id
 print(f"{batch_id=}")

 """
 Wait up to 24 hours.
 """
 time.sleep(24 * 60 * 60)


 """
 Retrieve the generations
 """
 response = client.batches.retrieve(batch_id=batch_id)
 output_file_id = response.output_file_id
 assert output_file_id is not None, "Learn to be patient!"
 print(f"{output_file_id=}")


 """
 Save to a local file.
 """
 content = client.files.content(output_file_id)
 content.write_to_file("output.jsonl")
	#!/usr/bin/python

	import json
	import textwrap
	import time

	from openai import OpenAI

	client = OpenAI()

	"""
	Create a data file containing a batch of chat messages to complete
	"""
	filepath = "data.jsonl"
	messages = [
	dict(
	messages=[
	dict(
	role="system",
	content="Improve the given code through clear and detailed thinking.",
	),
	dict(
	role="user",
	content=textwrap.dedent("""\
	```python
	from difflib import Differ
	def get_diff(before, after):
	differ = Differ()
	diff = differ.compare(before.split(), after.split())
	output = ""
	for line in diff:
	if line.startswith(" "):
	output += line[2:] + " "
	elif line.startswith("- "):
	output += f"\033[91m{line[2:]}\033[0m"
	elif line.startswith("+ "):
	output += f"\033[92m{line[2:]}\033[0m "
	else:
	output += line
	return output
	```"""),
	),
	],
	),
	# More conversations here (to make it a batch)
	]
	with open(filepath, "w") as file:
	for i, message in enumerate(messages):
	message = dict(
	custom_id=f"request-{i}",
	method="POST",
	url="/v1/chat/completions",
	body=dict(model="gpt-4-turbo", messages=[message]),
	) # https://platform.openai.com/docs/api-reference/batch/requestInput
	json.dump(message, file)
	file.write("\n")


	"""
	Upload to the storage (file to be found at https://platform.openai.com/storage/files)
	"""
	response = client.files.create(file=open(filepath, "rb"), purpose="batch")
	file_id = response.id
	print(f"{file_id=}")


	"""
	Submit a batch job
	"""
	response = client.batches.create(
	input_file_id=file_id,
	endpoint="/v1/chat/completions",
	completion_window="24h",
	)
	batch_id = response.id
	print(f"{batch_id=}")

	"""
	Wait up to 24 hours.
	"""
	time.sleep(24 * 60 * 60)


	"""
	Retrieve the generations
	"""
	response = client.batches.retrieve(batch_id=batch_id)
	output_file_id = response.output_file_id
	assert output_file_id is not None, "Learn to be patient!"
	print(f"{output_file_id=}")


	"""
	Save to a local file.
	"""
	content = client.files.content(output_file_id)
	content.write_to_file("output.jsonl")