Created
March 19, 2025 20:52
-
-
Save SteveBronder/97da0eea2c7f58c79e6132b95d504b96 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import os | |
import json | |
import time | |
from openai import OpenAI | |
# Initialize the OpenAI client using the new API interface. | |
client = OpenAI( | |
api_key=os.environ['OPENAI_API_KEY'] | |
) | |
def read_csv(file_path): | |
"""Read CSV file and return a list of dictionaries.""" | |
with open(file_path, newline="", encoding="utf-8") as csvfile: | |
reader = csv.DictReader(csvfile) | |
return list(reader) | |
def generate_json_for_mr(mr): | |
""" | |
Generate a JSON object for a single merge request. | |
The output JSON object has the following structure: | |
{ | |
"#<MR_Number>": { | |
"<category>": "[#<MR_Number>](<Link>): <One line summary>" | |
} | |
} | |
where <category> is one of: breaking_changes, major_features, improvements, | |
backend_improvements, or other_changes. | |
""" | |
prompt = ( | |
"You are a release notes categorizer for the Eigen C++ library. " | |
"Given the following merge request details, output a JSON object with a single key-value pair. " | |
"The key should be the merge request number prefixed with a '#' (for example, '#606'). " | |
"The value should be an object with one key, which must be one of: " | |
"'breaking_changes', 'major_features', 'improvements', 'backend_improvements', or 'other_changes'. " | |
"The value corresponding to that key should be a single-line markdown summary that includes the merge request number " | |
"as a markdown link (e.g., [#606](https://gitlab.com/libeigen/eigen/-/merge_requests/606)) followed by a concise description. " | |
""" | |
The output JSON object has the following structure: | |
{ | |
"#<MR_Number>": { | |
"<category>": "[#<MR_Number>](<Link>): <One line summary>" | |
} | |
} | |
where <category> is one of: breaking_changes, major_features, improvements, | |
backend_improvements, or other_changes. | |
""" | |
"Do not include any extra text or commentary. " | |
) | |
input_v = ("Here are the merge request details:\n" | |
f"Title: {mr.get('title_subject', 'NA')}\n" | |
f"Link: {mr.get('mr_id_link', 'NA')}\n" | |
f"Summary: {mr.get('summary', 'NA')}\n" | |
f"Merge Date: {mr.get('merge_date', 'NA')}\n\n" | |
"Output only a valid JSON object.") | |
response = client.responses.create( | |
model="gpt-4o", | |
instructions=prompt, | |
input=input_v | |
) | |
return response.output_text.strip() | |
def main(): | |
input_csv = "eigen_release_notes_with_summary.csv" | |
output_file = "mr_release_notes.jsonl" | |
bad_output_file = "mr_release_notes.txt" | |
first_try = True | |
# Read the CSV file containing merge request details. | |
rows = read_csv(input_csv) | |
with open(output_file, "w", encoding="utf-8") as outfile: | |
with open(bad_output_file, "w", encoding="utf-8") as bad_outfile: | |
for idx, mr in enumerate(rows, start=1): | |
if first_try: | |
import pdb; pdb.set_trace() | |
first_try = False | |
try: | |
mr_json_str = generate_json_for_mr(mr) | |
mr_json_str = mr_json_str.replace("```json", "") | |
mr_json_str = mr_json_str.replace("```", "") | |
# Validate that the output is valid JSON. | |
try: | |
mr_json = json.loads(mr_json_str) | |
# Write the JSON object as one line. | |
outfile.write(json.dumps(mr_json) + "\n") | |
except Exception as e: | |
print(f"Error parsing JSON for MR {idx}: {e}") | |
bad_outfile.write(f"{mr_json_str}\n") | |
print(f"Processed MR {idx}/{len(rows)}") | |
print("Output:", mr_json_str) | |
except Exception as e: | |
print(f"Error processing MR {idx}: {e}") | |
import pdb; pdb.set_trace() | |
time.sleep(0.1) # Adjust delay if necessary to avoid rate limits. | |
print(f"All MR release notes have been written to {output_file}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment