Created
April 16, 2024 09:31
-
-
Save kyo-takano/f969178f8423c0a2a6279db633c18d30 to your computer and use it in GitHub Desktop.
Submitting an Asyncronous Large-Batch Request with OpenAI Python SDK
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import json | |
import textwrap | |
import time | |
from openai import OpenAI | |
client = OpenAI() | |
""" | |
Create a data file containing a batch of chat messages to complete | |
""" | |
filepath = "data.jsonl" | |
messages = [ | |
dict( | |
messages=[ | |
dict( | |
role="system", | |
content="Improve the given code through clear and detailed thinking.", | |
), | |
dict( | |
role="user", | |
content=textwrap.dedent("""\ | |
```python | |
from difflib import Differ | |
def get_diff(before, after): | |
differ = Differ() | |
diff = differ.compare(before.split(), after.split()) | |
output = "" | |
for line in diff: | |
if line.startswith(" "): | |
output += line[2:] + " " | |
elif line.startswith("- "): | |
output += f"\033[91m{line[2:]}\033[0m" | |
elif line.startswith("+ "): | |
output += f"\033[92m{line[2:]}\033[0m " | |
else: | |
output += line | |
return output | |
```"""), | |
), | |
], | |
), | |
# More conversations here (to make it a batch) | |
] | |
with open(filepath, "w") as file: | |
for i, message in enumerate(messages): | |
message = dict( | |
custom_id=f"request-{i}", | |
method="POST", | |
url="/v1/chat/completions", | |
body=dict(model="gpt-4-turbo", messages=[message]), | |
) # https://platform.openai.com/docs/api-reference/batch/requestInput | |
json.dump(message, file) | |
file.write("\n") | |
""" | |
Upload to the storage (file to be found at https://platform.openai.com/storage/files) | |
""" | |
response = client.files.create(file=open(filepath, "rb"), purpose="batch") | |
file_id = response.id | |
print(f"{file_id=}") | |
""" | |
Submit a batch job | |
""" | |
response = client.batches.create( | |
input_file_id=file_id, | |
endpoint="/v1/chat/completions", | |
completion_window="24h", | |
) | |
batch_id = response.id | |
print(f"{batch_id=}") | |
""" | |
Wait up to 24 hours. | |
""" | |
time.sleep(24 * 60 * 60) | |
""" | |
Retrieve the generations | |
""" | |
response = client.batches.retrieve(batch_id=batch_id) | |
output_file_id = response.output_file_id | |
assert output_file_id is not None, "Learn to be patient!" | |
print(f"{output_file_id=}") | |
""" | |
Save to a local file. | |
""" | |
content = client.files.content(output_file_id) | |
content.write_to_file("output.jsonl") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Update
The official API Reference now has Python code examples for this feature:
https://platform.openai.com/docs/api-reference/batch/retrieve?lang=python