Created
July 22, 2024 06:37
-
-
Save yanyaoer/62a95dd231693ede80a00718fa5c9df2 to your computer and use it in GitHub Desktop.
gitlab-cr with llm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import json | |
# import sys | |
import logging | |
import os | |
import pathlib | |
import subprocess | |
import time | |
from urllib import parse, request | |
from urllib.error import HTTPError | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger() | |
# https://{gitlab_domain}.com/-/profile/personal_access_tokens | |
gitlab_key = os.getenv("GITLAB_KEY") | |
gpt_key = os.getenv("GPT_KEY") | |
if not gitlab_key or not gpt_key: | |
logger.error( | |
"need gitlab and gpt key: " + "$ GPT_KEY=xxx GITLAB_KEY=xxx python3 gitlab-ci.py" | |
) | |
exit(0) | |
__base_url__ = "https://{gitlab_doamin}.com/api/v4" | |
__gpt_url__ = "https://{gpt_selfhost}.openai.azure.com" | |
__ignore_suffix__ = os.getenv("__IGNORE_SUFFIX__", "md,so,bin,jpg,gif,png").split(",") | |
def req(url, data=None, headers=None, method="GET"): | |
if data: | |
data = json.dumps(dict(query=data)).encode() | |
q = request.Request( | |
__base_url__ + url, | |
headers=headers | |
or { | |
"PRIVATE-TOKEN": gitlab_key, | |
"Content-Type": "application/json", | |
}, | |
data=data, | |
method=method, | |
) | |
return json.load(request.urlopen(q)) | |
def execute(cmd): | |
""" | |
>>> execute('ls readme.md') | |
b'readme.md\\n' | |
""" | |
logger.info(cmd) | |
process = subprocess.Popen( | |
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE | |
) | |
(result, error) = process.communicate() | |
rc = process.wait() | |
if rc != 0: | |
logger.error(error, cmd) | |
return result | |
def code_review_by_gpt(diff_content, model="gpt-4o", ignore=False): | |
url = ( | |
f"{__gpt_url__}/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01" | |
) | |
headers = { | |
"Content-Type": "application/json", | |
"api-key": gpt_key, | |
} | |
data = { | |
"messages": [ | |
{ | |
"role": "system", | |
"content": 'You are an AI programming assistant.\nWhen asked for your name, you must respond with "GitHub Copilot".\nFollow the user\'s requirements carefully & to the letter.\nFollow Microsoft content policies.\nAvoid content that violates copyrights.\nIf you are asked to generate content that is harmful, hateful, racist, sexist, lewd, violent, or completely irrelevant to software engineering, only respond with "Sorry, I can\'t assist with that."\nKeep your answers short and impersonal.\nYou can answer general programming questions and perform the following tasks: \n* Ask a question about the files in your current workspace\n* Explain how the code in your active editor works\n* Review the selected code in your active editor\n* Generate unit tests for the selected code\n* Propose a fix for the problems in the selected code\n* Scaffold code for a new workspace\n* Create a new Jupyter Notebook\n* Find relevant code to your query\n* Propose a fix for the a test failure\n* Ask questions about VS Code\n* Generate query parameters for workspace search\n* Ask about VS Code extension development\n* Ask how to do something in the terminal\n* Explain what just happened in the terminal\nYou use the GPT-4 Turbo version of OpenAI\'s GPT models.\nFirst think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\nThen output the code in a single code block.\nMinimize any other prose.\nUse Markdown formatting in your answers.\nMake sure to include the programming language name at the start of the Markdown code blocks.\nAvoid wrapping the whole response in triple backticks.\nThe user works in an IDE called Visual Studio Code which has a concept for editors with open files, integrated unit test support, an output pane that shows the output of running the code as well as an integrated terminal.\nThe user is working on a Windows machine. Please respond with system specific commands if applicable.\nThe active document is the source code the user is looking at right now.\nYou can only give one reply for each conversation turn.\nRespond in the following locale: zh-cn', | |
}, | |
{ | |
"role": "user", | |
"content": f"{diff_content} Code review", | |
}, | |
], | |
"model": model, | |
"max_tokens": 4096, | |
"temperature": 0.1, | |
"top_p": 1, | |
"n": 1, | |
"stream": False, | |
} | |
data = json.dumps(data).encode() | |
q = request.Request(url, headers=headers, data=data) | |
try: | |
res = request.urlopen(q) | |
return json.load(res)["choices"][0]["message"]["content"] | |
except HTTPError as e: | |
if e.code == 429 and not ignore: | |
print(e.headers) | |
time.sleep(int(e.headers.get("Retry-After", 5))) | |
return code_review_by_gpt(diff_content, ignore=True) | |
def code_review_by_localLLM(block): #unused | |
prompt = 'You are an AI programming assistant.\nWhen asked for your name, you must respond with "GitHub Copilot".\nFollow the user\'s requirements carefully & to the letter.\nFollow Microsoft content policies.\nAvoid content that violates copyrights.\nIf you are asked to generate content that is harmful, hateful, racist, sexist, lewd, violent, or completely irrelevant to software engineering, only respond with "Sorry, I can\'t assist with that."\nKeep your answers short and impersonal.\nYou can answer general programming questions and perform the following tasks: \n* Ask a question about the files in your current workspace\n* Explain how the code in your active editor works\n* Review the selected code in your active editor\n* Generate unit tests for the selected code\n* Propose a fix for the problems in the selected code\n* Scaffold code for a new workspace\n* Create a new Jupyter Notebook\n* Find relevant code to your query\n* Propose a fix for the a test failure\n* Ask questions about VS Code\n* Generate query parameters for workspace search\n* Ask about VS Code extension development\n* Ask how to do something in the terminal\n* Explain what just happened in the terminal\nYou use the GPT-4 Turbo version of OpenAI\'s GPT models.\nFirst think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\nThen output the code in a single code block.\nMinimize any other prose.\nUse Markdown formatting in your answers.\nMake sure to include the programming language name at the start of the Markdown code blocks.\nAvoid wrapping the whole response in triple backticks.\nThe user works in an IDE called Visual Studio Code which has a concept for editors with open files, integrated unit test support, an output pane that shows the output of running the code as well as an integrated terminal.\nThe user is working on a Windows machine. Please respond with system specific commands if applicable.\nThe active document is the source code the user is looking at right now.\nYou can only give one reply for each conversation turn.\nRespond in the following locale: zh-cn' | |
# ~/Public/models/gemma_2_chinese_chat_q4_k_m.gguf --temp 0.5 -p \\ | |
# ~/Public/models/codellama-7b.Q4_K_M.gguf --temp 0.5 -p \\ | |
cmd = f"""bash -c \" ~/bin/llama-cli --log-disable -m ~/Public/models/gemma_2_chinese_chat_q4_k_m.gguf --temp 0.5 -p \'作为一名专业的程序员,请对以下代码变更内容进行审查,并给出修改意见和逻辑总结: {block}\' \" | |
""".encode("utf8") | |
# logger.info(f'\r\r======\r{cmd}') | |
return execute(cmd) | |
def add_comment(url, note, path, line): | |
# print(note) | |
q = request.Request( | |
__base_url__ + url, | |
headers={"PRIVATE-TOKEN": gitlab_key}, | |
data=parse.urlencode( | |
{ | |
"note": "Auto Code Review by GPT-4o: \n" + note, | |
"path": path, | |
"line": line, | |
"line_type": "new", | |
} | |
).encode(), | |
method="POST", | |
) | |
return json.load(request.urlopen(q)) | |
def get_commits(pid, branch="master"): | |
""" | |
>>> get_commits('tech-platform%2Fmagical', branch='llm-cr') | |
""" | |
r = req(f"/projects/{pid}/repository/commits?ref_name={branch}") | |
for x in r[0:1]: | |
df = req(f'/projects/{pid}/repository/commits/{x["id"]}/diff') | |
logging.info(df) | |
if not df: | |
logger.error("Failed to get diff content") | |
def start_review(pid, sha, dry_run=False): | |
""" | |
>>> start_review('tech-platform%2Fmagical', '947153c5420d765c2d0a1e729c629d8981ecbd26', dry_run=0) | |
""" | |
df = req(f"/projects/{pid}/repository/commits/{sha}/diff") | |
if not df: | |
logger.error("Failed to get diff content") | |
for block in df: | |
if pathlib.Path(block["new_path"]).suffix[1:] in __ignore_suffix__: | |
logger.warning(f'\r\rIgnored file: {block["new_path"]}') | |
continue | |
note = code_review_by_gpt(block) | |
logger.info(f'\r\r======\r {block["new_path"]} \r\r {note}') | |
if dry_run or not note: | |
return | |
cmt = add_comment( | |
url=f"/projects/{pid}/repository/commits/{sha}/comments", | |
note=note, | |
path=block["new_path"], | |
line=int(block["diff"].split("@@")[1].split(",")[0].replace(" -", "")), | |
) | |
logger.info(f"\r\r post comment: \r\r{cmt}") | |
def main(): | |
project = os.getenv("CI_PROJECT_PATH") | |
sha = os.getenv("CI_COMMIT_SHA") | |
if project and sha: | |
pid = parse.quote_plus(project) | |
start_review(pid, sha) | |
__doc__ = """ | |
>>> execute('echo 123') | |
b'123\\n' | |
>>> pathlib.Path('a.jpg').suffix[1:] in __ignore_suffix__ | |
True | |
""" | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment