Skip to content

Instantly share code, notes, and snippets.

@AaronGoldsmith
Created May 4, 2023 22:43
Show Gist options
  • Save AaronGoldsmith/bf5a562da9c0616e27cf348555f68588 to your computer and use it in GitHub Desktop.
Save AaronGoldsmith/bf5a562da9c0616e27cf348555f68588 to your computer and use it in GitHub Desktop.
generative commit
import sys
import os
from dotenv import load_dotenv, find_dotenv
import openai
import tiktoken
# Find and load the .env file from the project root
# load_dotenv('.env')
def num_tokens_from_string(string: str) -> int:
"""Returns the number of tokens in a text string."""
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
num_tokens = len(encoding.encode(string))
return num_tokens
def generate_summary(code_chunks):
openai.api_key = os.getenv("OPENAI_API_KEY")
summaries = []
for chunk in code_chunks:
system_intro = "You are a commit message generator used in prepare-commit-msg.\n \
1. Review the provided diff\n 2. Identify which category the diff falls into \
\n\t [fix][feat][chore][docs][style][refactor][perf][test] \
\n3. respond with [category] <commit message summarizing +/- changes> \
Constraint: Respond with the unpunctuated commit message related to the provided code change.\n"
prompt = f"{chunk}\n<<END_DIFF>>\n\n"
# Count the tokens in the prompt
tokens = num_tokens_from_string(prompt)
# Ensure the number of tokens is within the model's limits (4096 tokens for gpt-3.5-turbo)
if tokens > 4096:
print(f"Warning: Skipping file '{code_chunks}' due to token limit exceeded ({tokens} tokens).", file=sys.stderr)
continue
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
temperature=0,
messages=[
{"role": "system", "content": system_intro},
{"role": "user", "content": prompt}
]
)
summary = completion.choices[0].message.content
summaries.append(summary)
return summaries
if __name__ == "__main__":
code_chunks = sys.argv[1:]
summaries = generate_summary(code_chunks)
print("\n".join(summaries))
#!/bin/sh
COMMIT_MSG_FILE=$1
COMMIT_SOURCE=$2
SHA1=$3
contains_use_gpt() {
while IFS= read -r line; do
if [[ $line == *"<USE_GPT>"* ]]; then
return 0
fi
done < "$1"
return 1
}
if contains_use_gpt "$COMMIT_MSG_FILE"; then
# # Get the added code chunks
ADDED_CODE=$(git diff --cached --patch)
# echo "Added code chunks:"
# echo "$ADDED_CODE"
# Split the added code into chunks (separated by diff headers)
IFS=$'\n' read -ra CHUNKS <<< "$(echo "$ADDED_CODE" | awk '/^diff/{if (p) {print s}; s=""; p=1} {s=s $0 RS} END{if (p) print s}')"
# echo "Chunks array:"
# printf '%s\n' "${CHUNKS[@]}"
# Generate commit message summaries using the OpenAI completions endpoint
SUMMARIES=$(python ".git/hooks/gpt_commit_msg.py" "${CHUNKS[@]}")
# Remove the original commit message prompt and add the generated summaries
/usr/bin/perl -i.bak -ne 'print unless(m/^. Please enter the commit message/..m/^#$/)' "$COMMIT_MSG_FILE"
echo -n "" > "$COMMIT_MSG_FILE"
echo -e "$SUMMARIES\n" >> "$COMMIT_MSG_FILE"
# exit 1 # Uncomment this line to prevent commits while debugging
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment