yanyaoer · July 22, 2024 06:37
diff --git a/gitlab-cr.py b/gitlab-cr.py
 #!/usr/bin/env python3

 import json

 # import sys
 import logging
 import os
 import pathlib
 import subprocess
 import time
 from urllib import parse, request
 from urllib.error import HTTPError

 logging.basicConfig(level=logging.DEBUG)
 logger = logging.getLogger()


 # https://{gitlab_domain}.com/-/profile/personal_access_tokens
 gitlab_key = os.getenv("GITLAB_KEY")
 gpt_key = os.getenv("GPT_KEY")

 if not gitlab_key or not gpt_key:
  logger.error(
    "need gitlab and gpt key: " + "$ GPT_KEY=xxx GITLAB_KEY=xxx python3 gitlab-ci.py"
  )
  exit(0)

 __base_url__ = "https://{gitlab_doamin}.com/api/v4"
 __gpt_url__ = "https://{gpt_selfhost}.openai.azure.com"
 __ignore_suffix__ = os.getenv("__IGNORE_SUFFIX__", "md,so,bin,jpg,gif,png").split(",")


 def req(url, data=None, headers=None, method="GET"):
  if data:
    data = json.dumps(dict(query=data)).encode()
  q = request.Request(
    __base_url__ + url,
    headers=headers
    or {
      "PRIVATE-TOKEN": gitlab_key,
      "Content-Type": "application/json",
    },
    data=data,
    method=method,
  )
  return json.load(request.urlopen(q))


 def execute(cmd):
  """
  >>> execute('ls readme.md')
  b'readme.md\\n'
  """
  logger.info(cmd)
  process = subprocess.Popen(
    cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
  )
  (result, error) = process.communicate()

  rc = process.wait()
  if rc != 0:
    logger.error(error, cmd)
  return result


 def code_review_by_gpt(diff_content, model="gpt-4o", ignore=False):
  url = (
    f"{__gpt_url__}/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01"
  )

  headers = {
    "Content-Type": "application/json",
    "api-key": gpt_key,
  }

  data = {
    "messages": [
      {
        "role": "system",
        "content": 'You are an AI programming assistant.\nWhen asked for your name, you must respond with "GitHub Copilot".\nFollow the user\'s requirements carefully & to the letter.\nFollow Microsoft content policies.\nAvoid content that violates copyrights.\nIf you are asked to generate content that is harmful, hateful, racist, sexist, lewd, violent, or completely irrelevant to software engineering, only respond with "Sorry, I can\'t assist with that."\nKeep your answers short and impersonal.\nYou can answer general programming questions and perform the following tasks: \n* Ask a question about the files in your current workspace\n* Explain how the code in your active editor works\n* Review the selected code in your active editor\n* Generate unit tests for the selected code\n* Propose a fix for the problems in the selected code\n* Scaffold code for a new workspace\n* Create a new Jupyter Notebook\n* Find relevant code to your query\n* Propose a fix for the a test failure\n* Ask questions about VS Code\n* Generate query parameters for workspace search\n* Ask about VS Code extension development\n* Ask how to do something in the terminal\n* Explain what just happened in the terminal\nYou use the GPT-4 Turbo version of OpenAI\'s GPT models.\nFirst think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\nThen output the code in a single code block.\nMinimize any other prose.\nUse Markdown formatting in your answers.\nMake sure to include the programming language name at the start of the Markdown code blocks.\nAvoid wrapping the whole response in triple backticks.\nThe user works in an IDE called Visual Studio Code which has a concept for editors with open files, integrated unit test support, an output pane that shows the output of running the code as well as an integrated terminal.\nThe user is working on a Windows machine. Please respond with system specific commands if applicable.\nThe active document is the source code the user is looking at right now.\nYou can only give one reply for each conversation turn.\nRespond in the following locale: zh-cn',
      },
      {
        "role": "user",
        "content": f"{diff_content} Code review",
      },
    ],
    "model": model,
    "max_tokens": 4096,
    "temperature": 0.1,
    "top_p": 1,
    "n": 1,
    "stream": False,
  }
  data = json.dumps(data).encode()
  q = request.Request(url, headers=headers, data=data)

  try:
    res = request.urlopen(q)
    return json.load(res)["choices"][0]["message"]["content"]
  except HTTPError as e:
    if e.code == 429 and not ignore:
      print(e.headers)
      time.sleep(int(e.headers.get("Retry-After", 5)))
      return code_review_by_gpt(diff_content, ignore=True)


 def code_review_by_localLLM(block): #unused
  prompt = 'You are an AI programming assistant.\nWhen asked for your name, you must respond with "GitHub Copilot".\nFollow the user\'s requirements carefully & to the letter.\nFollow Microsoft content policies.\nAvoid content that violates copyrights.\nIf you are asked to generate content that is harmful, hateful, racist, sexist, lewd, violent, or completely irrelevant to software engineering, only respond with "Sorry, I can\'t assist with that."\nKeep your answers short and impersonal.\nYou can answer general programming questions and perform the following tasks: \n* Ask a question about the files in your current workspace\n* Explain how the code in your active editor works\n* Review the selected code in your active editor\n* Generate unit tests for the selected code\n* Propose a fix for the problems in the selected code\n* Scaffold code for a new workspace\n* Create a new Jupyter Notebook\n* Find relevant code to your query\n* Propose a fix for the a test failure\n* Ask questions about VS Code\n* Generate query parameters for workspace search\n* Ask about VS Code extension development\n* Ask how to do something in the terminal\n* Explain what just happened in the terminal\nYou use the GPT-4 Turbo version of OpenAI\'s GPT models.\nFirst think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\nThen output the code in a single code block.\nMinimize any other prose.\nUse Markdown formatting in your answers.\nMake sure to include the programming language name at the start of the Markdown code blocks.\nAvoid wrapping the whole response in triple backticks.\nThe user works in an IDE called Visual Studio Code which has a concept for editors with open files, integrated unit test support, an output pane that shows the output of running the code as well as an integrated terminal.\nThe user is working on a Windows machine. Please respond with system specific commands if applicable.\nThe active document is the source code the user is looking at right now.\nYou can only give one reply for each conversation turn.\nRespond in the following locale: zh-cn'

  # ~/Public/models/gemma_2_chinese_chat_q4_k_m.gguf --temp 0.5 -p \\
  # ~/Public/models/codellama-7b.Q4_K_M.gguf --temp 0.5 -p \\
  cmd = f"""bash -c \" ~/bin/llama-cli --log-disable -m ~/Public/models/gemma_2_chinese_chat_q4_k_m.gguf --temp 0.5 -p \'作为一名专业的程序员，请对以下代码变更内容进行审查，并给出修改意见和逻辑总结: {block}\' \"
   """.encode("utf8")
  # logger.info(f'\r\r======\r{cmd}')

  return execute(cmd)


 def add_comment(url, note, path, line):
  # print(note)
  q = request.Request(
    __base_url__ + url,
    headers={"PRIVATE-TOKEN": gitlab_key},
    data=parse.urlencode(
      {
        "note": "Auto Code Review by GPT-4o: \n" + note,
        "path": path,
        "line": line,
        "line_type": "new",
      }
    ).encode(),
    method="POST",
  )
  return json.load(request.urlopen(q))


 def get_commits(pid, branch="master"):
  """
  >>> get_commits('tech-platform%2Fmagical', branch='llm-cr')
  """

  r = req(f"/projects/{pid}/repository/commits?ref_name={branch}")
  for x in r[0:1]:
    df = req(f'/projects/{pid}/repository/commits/{x["id"]}/diff')
    logging.info(df)
    if not df:
      logger.error("Failed to get diff content")


 def start_review(pid, sha, dry_run=False):
  """
  >>> start_review('tech-platform%2Fmagical', '947153c5420d765c2d0a1e729c629d8981ecbd26', dry_run=0)
  """
  df = req(f"/projects/{pid}/repository/commits/{sha}/diff")
  if not df:
    logger.error("Failed to get diff content")

  for block in df:
    if pathlib.Path(block["new_path"]).suffix[1:] in __ignore_suffix__:
      logger.warning(f'\r\rIgnored file: {block["new_path"]}')
      continue

    note = code_review_by_gpt(block)
    logger.info(f'\r\r======\r {block["new_path"]} \r\r {note}')
    if dry_run or not note:
      return

    cmt = add_comment(
      url=f"/projects/{pid}/repository/commits/{sha}/comments",
      note=note,
      path=block["new_path"],
      line=int(block["diff"].split("@@")[1].split(",")[0].replace(" -", "")),
    )
    logger.info(f"\r\r post comment: \r\r{cmt}")


 def main():
  project = os.getenv("CI_PROJECT_PATH")
  sha = os.getenv("CI_COMMIT_SHA")
  if project and sha:
    pid = parse.quote_plus(project)
    start_review(pid, sha)


 __doc__ = """
 >>> execute('echo 123')
 b'123\\n'

 >>> pathlib.Path('a.jpg').suffix[1:] in __ignore_suffix__
 True

 """


 if __name__ == "__main__":
  main()
	#!/usr/bin/env python3

	import json

	# import sys
	import logging
	import os
	import pathlib
	import subprocess
	import time
	from urllib import parse, request
	from urllib.error import HTTPError

	logging.basicConfig(level=logging.DEBUG)
	logger = logging.getLogger()


	# https://{gitlab_domain}.com/-/profile/personal_access_tokens
	gitlab_key = os.getenv("GITLAB_KEY")
	gpt_key = os.getenv("GPT_KEY")

	if not gitlab_key or not gpt_key:
	logger.error(
	"need gitlab and gpt key: " + "$ GPT_KEY=xxx GITLAB_KEY=xxx python3 gitlab-ci.py"
	)
	exit(0)

	__base_url__ = "https://{gitlab_doamin}.com/api/v4"
	__gpt_url__ = "https://{gpt_selfhost}.openai.azure.com"
	__ignore_suffix__ = os.getenv("__IGNORE_SUFFIX__", "md,so,bin,jpg,gif,png").split(",")


	def req(url, data=None, headers=None, method="GET"):
	if data:
	data = json.dumps(dict(query=data)).encode()
	q = request.Request(
	__base_url__ + url,
	headers=headers
	or {
	"PRIVATE-TOKEN": gitlab_key,
	"Content-Type": "application/json",
	},
	data=data,
	method=method,
	)
	return json.load(request.urlopen(q))


	def execute(cmd):
	"""
	>>> execute('ls readme.md')
	b'readme.md\\n'
	"""
	logger.info(cmd)
	process = subprocess.Popen(
	cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
	)
	(result, error) = process.communicate()

	rc = process.wait()
	if rc != 0:
	logger.error(error, cmd)
	return result


	def code_review_by_gpt(diff_content, model="gpt-4o", ignore=False):
	url = (
	f"{__gpt_url__}/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01"
	)

	headers = {
	"Content-Type": "application/json",
	"api-key": gpt_key,
	}

	data = {
	"messages": [
	{
	"role": "system",
	"content": 'You are an AI programming assistant.\nWhen asked for your name, you must respond with "GitHub Copilot".\nFollow the user\'s requirements carefully & to the letter.\nFollow Microsoft content policies.\nAvoid content that violates copyrights.\nIf you are asked to generate content that is harmful, hateful, racist, sexist, lewd, violent, or completely irrelevant to software engineering, only respond with "Sorry, I can\'t assist with that."\nKeep your answers short and impersonal.\nYou can answer general programming questions and perform the following tasks: \n* Ask a question about the files in your current workspace\n* Explain how the code in your active editor works\n* Review the selected code in your active editor\n* Generate unit tests for the selected code\n* Propose a fix for the problems in the selected code\n* Scaffold code for a new workspace\n* Create a new Jupyter Notebook\n* Find relevant code to your query\n* Propose a fix for the a test failure\n* Ask questions about VS Code\n* Generate query parameters for workspace search\n* Ask about VS Code extension development\n* Ask how to do something in the terminal\n* Explain what just happened in the terminal\nYou use the GPT-4 Turbo version of OpenAI\'s GPT models.\nFirst think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\nThen output the code in a single code block.\nMinimize any other prose.\nUse Markdown formatting in your answers.\nMake sure to include the programming language name at the start of the Markdown code blocks.\nAvoid wrapping the whole response in triple backticks.\nThe user works in an IDE called Visual Studio Code which has a concept for editors with open files, integrated unit test support, an output pane that shows the output of running the code as well as an integrated terminal.\nThe user is working on a Windows machine. Please respond with system specific commands if applicable.\nThe active document is the source code the user is looking at right now.\nYou can only give one reply for each conversation turn.\nRespond in the following locale: zh-cn',
	},
	{
	"role": "user",
	"content": f"{diff_content} Code review",
	},
	],
	"model": model,
	"max_tokens": 4096,
	"temperature": 0.1,
	"top_p": 1,
	"n": 1,
	"stream": False,
	}
	data = json.dumps(data).encode()
	q = request.Request(url, headers=headers, data=data)

	try:
	res = request.urlopen(q)
	return json.load(res)["choices"][0]["message"]["content"]
	except HTTPError as e:
	if e.code == 429 and not ignore:
	print(e.headers)
	time.sleep(int(e.headers.get("Retry-After", 5)))
	return code_review_by_gpt(diff_content, ignore=True)


	def code_review_by_localLLM(block): #unused
	prompt = 'You are an AI programming assistant.\nWhen asked for your name, you must respond with "GitHub Copilot".\nFollow the user\'s requirements carefully & to the letter.\nFollow Microsoft content policies.\nAvoid content that violates copyrights.\nIf you are asked to generate content that is harmful, hateful, racist, sexist, lewd, violent, or completely irrelevant to software engineering, only respond with "Sorry, I can\'t assist with that."\nKeep your answers short and impersonal.\nYou can answer general programming questions and perform the following tasks: \n* Ask a question about the files in your current workspace\n* Explain how the code in your active editor works\n* Review the selected code in your active editor\n* Generate unit tests for the selected code\n* Propose a fix for the problems in the selected code\n* Scaffold code for a new workspace\n* Create a new Jupyter Notebook\n* Find relevant code to your query\n* Propose a fix for the a test failure\n* Ask questions about VS Code\n* Generate query parameters for workspace search\n* Ask about VS Code extension development\n* Ask how to do something in the terminal\n* Explain what just happened in the terminal\nYou use the GPT-4 Turbo version of OpenAI\'s GPT models.\nFirst think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.\nThen output the code in a single code block.\nMinimize any other prose.\nUse Markdown formatting in your answers.\nMake sure to include the programming language name at the start of the Markdown code blocks.\nAvoid wrapping the whole response in triple backticks.\nThe user works in an IDE called Visual Studio Code which has a concept for editors with open files, integrated unit test support, an output pane that shows the output of running the code as well as an integrated terminal.\nThe user is working on a Windows machine. Please respond with system specific commands if applicable.\nThe active document is the source code the user is looking at right now.\nYou can only give one reply for each conversation turn.\nRespond in the following locale: zh-cn'

	# ~/Public/models/gemma_2_chinese_chat_q4_k_m.gguf --temp 0.5 -p \\
	# ~/Public/models/codellama-7b.Q4_K_M.gguf --temp 0.5 -p \\
	cmd = f"""bash -c \" ~/bin/llama-cli --log-disable -m ~/Public/models/gemma_2_chinese_chat_q4_k_m.gguf --temp 0.5 -p \'作为一名专业的程序员，请对以下代码变更内容进行审查，并给出修改意见和逻辑总结: {block}\' \"
	""".encode("utf8")
	# logger.info(f'\r\r======\r{cmd}')

	return execute(cmd)


	def add_comment(url, note, path, line):
	# print(note)
	q = request.Request(
	__base_url__ + url,
	headers={"PRIVATE-TOKEN": gitlab_key},
	data=parse.urlencode(
	{
	"note": "Auto Code Review by GPT-4o: \n" + note,
	"path": path,
	"line": line,
	"line_type": "new",
	}
	).encode(),
	method="POST",
	)
	return json.load(request.urlopen(q))


	def get_commits(pid, branch="master"):
	"""
	>>> get_commits('tech-platform%2Fmagical', branch='llm-cr')
	"""

	r = req(f"/projects/{pid}/repository/commits?ref_name={branch}")
	for x in r[0:1]:
	df = req(f'/projects/{pid}/repository/commits/{x["id"]}/diff')
	logging.info(df)
	if not df:
	logger.error("Failed to get diff content")


	def start_review(pid, sha, dry_run=False):
	"""
	>>> start_review('tech-platform%2Fmagical', '947153c5420d765c2d0a1e729c629d8981ecbd26', dry_run=0)
	"""
	df = req(f"/projects/{pid}/repository/commits/{sha}/diff")
	if not df:
	logger.error("Failed to get diff content")

	for block in df:
	if pathlib.Path(block["new_path"]).suffix[1:] in __ignore_suffix__:
	logger.warning(f'\r\rIgnored file: {block["new_path"]}')
	continue

	note = code_review_by_gpt(block)
	logger.info(f'\r\r======\r {block["new_path"]} \r\r {note}')
	if dry_run or not note:
	return

	cmt = add_comment(
	url=f"/projects/{pid}/repository/commits/{sha}/comments",
	note=note,
	path=block["new_path"],
	line=int(block["diff"].split("@@")[1].split(",")[0].replace(" -", "")),
	)
	logger.info(f"\r\r post comment: \r\r{cmt}")


	def main():
	project = os.getenv("CI_PROJECT_PATH")
	sha = os.getenv("CI_COMMIT_SHA")
	if project and sha:
	pid = parse.quote_plus(project)
	start_review(pid, sha)


	__doc__ = """
	>>> execute('echo 123')
	b'123\\n'

	>>> pathlib.Path('a.jpg').suffix[1:] in __ignore_suffix__
	True

	"""


	if __name__ == "__main__":
	main()
No results found