Created
May 4, 2023 22:43
-
-
Save AaronGoldsmith/bf5a562da9c0616e27cf348555f68588 to your computer and use it in GitHub Desktop.
generative commit
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
from dotenv import load_dotenv, find_dotenv | |
import openai | |
import tiktoken | |
# Find and load the .env file from the project root | |
# load_dotenv('.env') | |
def num_tokens_from_string(string: str) -> int: | |
"""Returns the number of tokens in a text string.""" | |
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo") | |
num_tokens = len(encoding.encode(string)) | |
return num_tokens | |
def generate_summary(code_chunks): | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
summaries = [] | |
for chunk in code_chunks: | |
system_intro = "You are a commit message generator used in prepare-commit-msg.\n \ | |
1. Review the provided diff\n 2. Identify which category the diff falls into \ | |
\n\t [fix][feat][chore][docs][style][refactor][perf][test] \ | |
\n3. respond with [category] <commit message summarizing +/- changes> \ | |
Constraint: Respond with the unpunctuated commit message related to the provided code change.\n" | |
prompt = f"{chunk}\n<<END_DIFF>>\n\n" | |
# Count the tokens in the prompt | |
tokens = num_tokens_from_string(prompt) | |
# Ensure the number of tokens is within the model's limits (4096 tokens for gpt-3.5-turbo) | |
if tokens > 4096: | |
print(f"Warning: Skipping file '{code_chunks}' due to token limit exceeded ({tokens} tokens).", file=sys.stderr) | |
continue | |
completion = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
temperature=0, | |
messages=[ | |
{"role": "system", "content": system_intro}, | |
{"role": "user", "content": prompt} | |
] | |
) | |
summary = completion.choices[0].message.content | |
summaries.append(summary) | |
return summaries | |
if __name__ == "__main__": | |
code_chunks = sys.argv[1:] | |
summaries = generate_summary(code_chunks) | |
print("\n".join(summaries)) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
COMMIT_MSG_FILE=$1 | |
COMMIT_SOURCE=$2 | |
SHA1=$3 | |
contains_use_gpt() { | |
while IFS= read -r line; do | |
if [[ $line == *"<USE_GPT>"* ]]; then | |
return 0 | |
fi | |
done < "$1" | |
return 1 | |
} | |
if contains_use_gpt "$COMMIT_MSG_FILE"; then | |
# # Get the added code chunks | |
ADDED_CODE=$(git diff --cached --patch) | |
# echo "Added code chunks:" | |
# echo "$ADDED_CODE" | |
# Split the added code into chunks (separated by diff headers) | |
IFS=$'\n' read -ra CHUNKS <<< "$(echo "$ADDED_CODE" | awk '/^diff/{if (p) {print s}; s=""; p=1} {s=s $0 RS} END{if (p) print s}')" | |
# echo "Chunks array:" | |
# printf '%s\n' "${CHUNKS[@]}" | |
# Generate commit message summaries using the OpenAI completions endpoint | |
SUMMARIES=$(python ".git/hooks/gpt_commit_msg.py" "${CHUNKS[@]}") | |
# Remove the original commit message prompt and add the generated summaries | |
/usr/bin/perl -i.bak -ne 'print unless(m/^. Please enter the commit message/..m/^#$/)' "$COMMIT_MSG_FILE" | |
echo -n "" > "$COMMIT_MSG_FILE" | |
echo -e "$SUMMARIES\n" >> "$COMMIT_MSG_FILE" | |
# exit 1 # Uncomment this line to prevent commits while debugging | |
fi | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment