AaronGoldsmith · May 4, 2023 22:43
diff --git a/gpt_commit_msg.py b/gpt_commit_msg.py
 import sys
 import os
 from dotenv import load_dotenv, find_dotenv
 import openai
 import tiktoken

 # Find and load the .env file from the project root
 # load_dotenv('.env')

 def num_tokens_from_string(string: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
    num_tokens = len(encoding.encode(string))
    return num_tokens

 def generate_summary(code_chunks):
    openai.api_key = os.getenv("OPENAI_API_KEY")

    summaries = []


    for chunk in code_chunks:
        system_intro = "You are a commit message generator used in prepare-commit-msg.\n \
                        1. Review the provided diff\n 2. Identify which category the diff falls into \
                        \n\t [fix][feat][chore][docs][style][refactor][perf][test]   \
                        \n3. respond with [category] <commit message summarizing +/- changes> \
                        Constraint: Respond with the unpunctuated commit message related to the provided code change.\n"
        prompt = f"{chunk}\n<<END_DIFF>>\n\n"
        


        # Count the tokens in the prompt
        tokens = num_tokens_from_string(prompt)

        # Ensure the number of tokens is within the model's limits (4096 tokens for gpt-3.5-turbo)
        if tokens > 4096:
            print(f"Warning: Skipping file '{code_chunks}' due to token limit exceeded ({tokens} tokens).", file=sys.stderr)
            continue

        completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            temperature=0,
            messages=[
                {"role": "system", "content": system_intro},
                {"role": "user", "content": prompt}
            ]
        )
        summary = completion.choices[0].message.content
        summaries.append(summary)

    return summaries

 if __name__ == "__main__":
    code_chunks = sys.argv[1:]
    summaries = generate_summary(code_chunks)
    print("\n".join(summaries))

diff --git a/prepare-commit-msg.sh b/prepare-commit-msg.sh
 #!/bin/sh

 COMMIT_MSG_FILE=$1
 COMMIT_SOURCE=$2
 SHA1=$3

 contains_use_gpt() {
    while IFS= read -r line; do
        if [[ $line == *"<USE_GPT>"* ]]; then
            return 0
        fi
    done < "$1"
    return 1
 }

 if contains_use_gpt "$COMMIT_MSG_FILE"; then
    #    # Get the added code chunks
    ADDED_CODE=$(git diff --cached --patch)
    # echo "Added code chunks:"
    # echo "$ADDED_CODE"

    # Split the added code into chunks (separated by diff headers)
    IFS=$'\n' read -ra CHUNKS <<< "$(echo "$ADDED_CODE" | awk '/^diff/{if (p) {print s}; s=""; p=1} {s=s $0 RS} END{if (p) print s}')"
    # echo "Chunks array:"
    # printf '%s\n' "${CHUNKS[@]}"

    # Generate commit message summaries using the OpenAI completions endpoint
    SUMMARIES=$(python ".git/hooks/gpt_commit_msg.py" "${CHUNKS[@]}")

    # Remove the original commit message prompt and add the generated summaries
    /usr/bin/perl -i.bak -ne 'print unless(m/^. Please enter the commit message/..m/^#$/)' "$COMMIT_MSG_FILE"
    echo -n "" > "$COMMIT_MSG_FILE"
    echo -e "$SUMMARIES\n" >> "$COMMIT_MSG_FILE"

    # exit 1  # Uncomment this line to prevent commits while debugging
 fi
	import sys
	import os
	from dotenv import load_dotenv, find_dotenv
	import openai
	import tiktoken

	# Find and load the .env file from the project root
	# load_dotenv('.env')

	def num_tokens_from_string(string: str) -> int:
	"""Returns the number of tokens in a text string."""
	encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
	num_tokens = len(encoding.encode(string))
	return num_tokens

	def generate_summary(code_chunks):
	openai.api_key = os.getenv("OPENAI_API_KEY")

	summaries = []


	for chunk in code_chunks:
	system_intro = "You are a commit message generator used in prepare-commit-msg.\n \
	1. Review the provided diff\n 2. Identify which category the diff falls into \
	\n\t [fix][feat][chore][docs][style][refactor][perf][test] \
	\n3. respond with [category] <commit message summarizing +/- changes> \
	Constraint: Respond with the unpunctuated commit message related to the provided code change.\n"
	prompt = f"{chunk}\n<<END_DIFF>>\n\n"



	# Count the tokens in the prompt
	tokens = num_tokens_from_string(prompt)

	# Ensure the number of tokens is within the model's limits (4096 tokens for gpt-3.5-turbo)
	if tokens > 4096:
	print(f"Warning: Skipping file '{code_chunks}' due to token limit exceeded ({tokens} tokens).", file=sys.stderr)
	continue

	completion = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	temperature=0,
	messages=[
	{"role": "system", "content": system_intro},
	{"role": "user", "content": prompt}
	]
	)
	summary = completion.choices[0].message.content
	summaries.append(summary)

	return summaries

	if __name__ == "__main__":
	code_chunks = sys.argv[1:]
	summaries = generate_summary(code_chunks)
	print("\n".join(summaries))
	#!/bin/sh

	COMMIT_MSG_FILE=$1
	COMMIT_SOURCE=$2
	SHA1=$3

	contains_use_gpt() {
	while IFS= read -r line; do
	if [[ $line == "<USE_GPT>" ]]; then
	return 0
	fi
	done < "$1"
	return 1
	}

	if contains_use_gpt "$COMMIT_MSG_FILE"; then
	# # Get the added code chunks
	ADDED_CODE=$(git diff --cached --patch)
	# echo "Added code chunks:"
	# echo "$ADDED_CODE"

	# Split the added code into chunks (separated by diff headers)
	IFS=$'\n' read -ra CHUNKS <<< "$(echo "$ADDED_CODE" \| awk '/^diff/{if (p) {print s}; s=""; p=1} {s=s $0 RS} END{if (p) print s}')"
	# echo "Chunks array:"
	# printf '%s\n' "${CHUNKS[@]}"

	# Generate commit message summaries using the OpenAI completions endpoint
	SUMMARIES=$(python ".git/hooks/gpt_commit_msg.py" "${CHUNKS[@]}")

	# Remove the original commit message prompt and add the generated summaries
	/usr/bin/perl -i.bak -ne 'print unless(m/^. Please enter the commit message/..m/^#$/)' "$COMMIT_MSG_FILE"
	echo -n "" > "$COMMIT_MSG_FILE"
	echo -e "$SUMMARIES\n" >> "$COMMIT_MSG_FILE"

	# exit 1 # Uncomment this line to prevent commits while debugging
	fi