Skip to content

Instantly share code, notes, and snippets.

@SteveBronder
Created March 19, 2025 20:52
Show Gist options
  • Save SteveBronder/97da0eea2c7f58c79e6132b95d504b96 to your computer and use it in GitHub Desktop.
Save SteveBronder/97da0eea2c7f58c79e6132b95d504b96 to your computer and use it in GitHub Desktop.
import csv
import os
import json
import time
from openai import OpenAI
# Initialize the OpenAI client using the new API interface.
client = OpenAI(
api_key=os.environ['OPENAI_API_KEY']
)
def read_csv(file_path):
"""Read CSV file and return a list of dictionaries."""
with open(file_path, newline="", encoding="utf-8") as csvfile:
reader = csv.DictReader(csvfile)
return list(reader)
def generate_json_for_mr(mr):
"""
Generate a JSON object for a single merge request.
The output JSON object has the following structure:
{
"#<MR_Number>": {
"<category>": "[#<MR_Number>](<Link>): <One line summary>"
}
}
where <category> is one of: breaking_changes, major_features, improvements,
backend_improvements, or other_changes.
"""
prompt = (
"You are a release notes categorizer for the Eigen C++ library. "
"Given the following merge request details, output a JSON object with a single key-value pair. "
"The key should be the merge request number prefixed with a '#' (for example, '#606'). "
"The value should be an object with one key, which must be one of: "
"'breaking_changes', 'major_features', 'improvements', 'backend_improvements', or 'other_changes'. "
"The value corresponding to that key should be a single-line markdown summary that includes the merge request number "
"as a markdown link (e.g., [#606](https://gitlab.com/libeigen/eigen/-/merge_requests/606)) followed by a concise description. "
"""
The output JSON object has the following structure:
{
"#<MR_Number>": {
"<category>": "[#<MR_Number>](<Link>): <One line summary>"
}
}
where <category> is one of: breaking_changes, major_features, improvements,
backend_improvements, or other_changes.
"""
"Do not include any extra text or commentary. "
)
input_v = ("Here are the merge request details:\n"
f"Title: {mr.get('title_subject', 'NA')}\n"
f"Link: {mr.get('mr_id_link', 'NA')}\n"
f"Summary: {mr.get('summary', 'NA')}\n"
f"Merge Date: {mr.get('merge_date', 'NA')}\n\n"
"Output only a valid JSON object.")
response = client.responses.create(
model="gpt-4o",
instructions=prompt,
input=input_v
)
return response.output_text.strip()
def main():
input_csv = "eigen_release_notes_with_summary.csv"
output_file = "mr_release_notes.jsonl"
bad_output_file = "mr_release_notes.txt"
first_try = True
# Read the CSV file containing merge request details.
rows = read_csv(input_csv)
with open(output_file, "w", encoding="utf-8") as outfile:
with open(bad_output_file, "w", encoding="utf-8") as bad_outfile:
for idx, mr in enumerate(rows, start=1):
if first_try:
import pdb; pdb.set_trace()
first_try = False
try:
mr_json_str = generate_json_for_mr(mr)
mr_json_str = mr_json_str.replace("```json", "")
mr_json_str = mr_json_str.replace("```", "")
# Validate that the output is valid JSON.
try:
mr_json = json.loads(mr_json_str)
# Write the JSON object as one line.
outfile.write(json.dumps(mr_json) + "\n")
except Exception as e:
print(f"Error parsing JSON for MR {idx}: {e}")
bad_outfile.write(f"{mr_json_str}\n")
print(f"Processed MR {idx}/{len(rows)}")
print("Output:", mr_json_str)
except Exception as e:
print(f"Error processing MR {idx}: {e}")
import pdb; pdb.set_trace()
time.sleep(0.1) # Adjust delay if necessary to avoid rate limits.
print(f"All MR release notes have been written to {output_file}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment