Skip to content

Instantly share code, notes, and snippets.

@VeckoTheGecko
Last active January 15, 2025 23:06
Show Gist options
  • Save VeckoTheGecko/e695f046503ea5bf3eff3ee8be9676b3 to your computer and use it in GitHub Desktop.
Save VeckoTheGecko/e695f046503ea5bf3eff3ee8be9676b3 to your computer and use it in GitHub Desktop.
Convert GitHub search queries to markdown tables
"""
CLI tool for converting GitHub search queries to markdown tables.
Motivation:
GitHub search syntax is powerful to search for specific sets of pull requests.
This tool allows you to easily extract information on pull requests and mentioned issues
in a markdown table with links to be included in a markdown file.
Look at the CLI_DESCRIPTION variable, or run `python github-to-md.py --help` for more info.
"""
import argparse
from dataclasses import dataclass
import requests
import re
import os
CLI_DESCRIPTION = """
Convert a GitHub a github search query for a repo to a markdown table. This is useful for
quickly exporting certain searches for future reference. Table is in the following format:
| PR | Author | Mentioned Issues |
| --- | ------ | ---------------- |
...
The PR field includes the title. All PR and issue mentions are links.
GITHUB_TOKEN environment variable must be set with a GitHub personal access token.
examples:
python github-to-md.py 'pydata/xarray' 'is:pull-request author:VeckoTheGecko'
"""
def get_github_token() -> str:
try:
return os.environ["GITHUB_TOKEN"] # GitHub personal access token
except KeyError as e:
print("GITHUB_TOKEN environment variable not set.")
raise e
def get_headers():
return {
"Authorization": f"token {get_github_token()}",
"Accept": "application/vnd.github.v3+json",
}
def get_issue_link(repo: str, issue_number: str) -> str:
return f"https://github.com/{repo}/issues/{issue_number}"
def get_pull_requests(
query: str,
repo: str,
search_url="https://api.github.com/search/issues",
n_pages: int = 1,
pull_requests: list[dict] | None = None,
):
if pull_requests is None:
pull_requests = []
full_query = f"{query} repo:{repo}"
headers = get_headers()
try:
response = requests.get(search_url, headers=headers, params={"q": full_query})
if response.status_code == 200:
pull_requests_iter = response.json().get("items", [])
pull_requests.extend(pull_requests_iter)
else:
print(
f"Error: {response.status_code} - {response.json().get('message', 'Unknown error')}"
)
next_page_url = response.links.get("next", {}).get("url")
except Exception as e:
print(f"An error occurred: {e}")
if n_pages <= 0:
return pull_requests_iter
if next_page_url is None:
return pull_requests
return get_pull_requests(
query,
repo,
search_url=next_page_url,
n_pages=n_pages - 1,
pull_requests=pull_requests,
)
@dataclass
class PullRequestInfo:
title: str
number: str
author: str
mentioned_issues: list[str]
@staticmethod
def get_md_header() -> str:
out = "| PR | Author | Mentioned Issues |\n"
out += "| --- | --- | --- |\n"
return out
def to_md_row(self, repo: str):
return f"| {self.linked_number_title(repo)} | @{self.author} | {self.mentioned_issues_links(repo)} |\n"
@property
def number_title(self):
return f"[{self.number}] {self.title}"
def linked_number_title(self, repo: str):
return f"[{self.number_title}]({get_issue_link(repo, self.number)})"
def mentioned_issues_links(self, repo: str) -> str:
links = []
for i in self.mentioned_issues:
links.append(f"[{i}]({get_issue_link(repo, i)})")
return ", ".join(links)
def extract_pr_info(pr: dict) -> PullRequestInfo:
body = pr["body"]
if body is None:
matches = []
else:
matches = re.findall(r"#\d+", body)
mentioned_issues: list[str] = []
for issue in matches:
issue = issue.strip("#")
if issue not in mentioned_issues:
mentioned_issues.append(issue)
return PullRequestInfo(
title=pr["title"],
number=str(pr["number"]),
author=pr["user"]["login"],
mentioned_issues=mentioned_issues,
)
def search_query_to_markdown(repo: str, search_query: str) -> str:
prs = get_pull_requests(search_query, repo, n_pages=1000)
prs = [extract_pr_info(pr) for pr in prs]
out = PullRequestInfo.get_md_header()
for pr in prs:
out += pr.to_md_row(repo)
return out
def main():
parser = argparse.ArgumentParser(
description=CLI_DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"repo", type=str, help="Owner and repository name in the format owner/repo"
)
parser.add_argument("search_query", type=str, help="GitHub search query")
args = parser.parse_args()
out = search_query_to_markdown(args.repo, args.search_query)
print(out)
if __name__ == "__main__":
# out = search_query_to_markdown(repo="pydata/xarray", search_query="is:pr reviewed-by:dcherian")
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment