Last active
January 15, 2025 23:06
-
-
Save VeckoTheGecko/e695f046503ea5bf3eff3ee8be9676b3 to your computer and use it in GitHub Desktop.
Convert GitHub search queries to markdown tables
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
CLI tool for converting GitHub search queries to markdown tables. | |
Motivation: | |
GitHub search syntax is powerful to search for specific sets of pull requests. | |
This tool allows you to easily extract information on pull requests and mentioned issues | |
in a markdown table with links to be included in a markdown file. | |
Look at the CLI_DESCRIPTION variable, or run `python github-to-md.py --help` for more info. | |
""" | |
import argparse | |
from dataclasses import dataclass | |
import requests | |
import re | |
import os | |
CLI_DESCRIPTION = """ | |
Convert a GitHub a github search query for a repo to a markdown table. This is useful for | |
quickly exporting certain searches for future reference. Table is in the following format: | |
| PR | Author | Mentioned Issues | | |
| --- | ------ | ---------------- | | |
... | |
The PR field includes the title. All PR and issue mentions are links. | |
GITHUB_TOKEN environment variable must be set with a GitHub personal access token. | |
examples: | |
python github-to-md.py 'pydata/xarray' 'is:pull-request author:VeckoTheGecko' | |
""" | |
def get_github_token() -> str: | |
try: | |
return os.environ["GITHUB_TOKEN"] # GitHub personal access token | |
except KeyError as e: | |
print("GITHUB_TOKEN environment variable not set.") | |
raise e | |
def get_headers(): | |
return { | |
"Authorization": f"token {get_github_token()}", | |
"Accept": "application/vnd.github.v3+json", | |
} | |
def get_issue_link(repo: str, issue_number: str) -> str: | |
return f"https://github.com/{repo}/issues/{issue_number}" | |
def get_pull_requests( | |
query: str, | |
repo: str, | |
search_url="https://api.github.com/search/issues", | |
n_pages: int = 1, | |
pull_requests: list[dict] | None = None, | |
): | |
if pull_requests is None: | |
pull_requests = [] | |
full_query = f"{query} repo:{repo}" | |
headers = get_headers() | |
try: | |
response = requests.get(search_url, headers=headers, params={"q": full_query}) | |
if response.status_code == 200: | |
pull_requests_iter = response.json().get("items", []) | |
pull_requests.extend(pull_requests_iter) | |
else: | |
print( | |
f"Error: {response.status_code} - {response.json().get('message', 'Unknown error')}" | |
) | |
next_page_url = response.links.get("next", {}).get("url") | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
if n_pages <= 0: | |
return pull_requests_iter | |
if next_page_url is None: | |
return pull_requests | |
return get_pull_requests( | |
query, | |
repo, | |
search_url=next_page_url, | |
n_pages=n_pages - 1, | |
pull_requests=pull_requests, | |
) | |
@dataclass | |
class PullRequestInfo: | |
title: str | |
number: str | |
author: str | |
mentioned_issues: list[str] | |
@staticmethod | |
def get_md_header() -> str: | |
out = "| PR | Author | Mentioned Issues |\n" | |
out += "| --- | --- | --- |\n" | |
return out | |
def to_md_row(self, repo: str): | |
return f"| {self.linked_number_title(repo)} | @{self.author} | {self.mentioned_issues_links(repo)} |\n" | |
@property | |
def number_title(self): | |
return f"[{self.number}] {self.title}" | |
def linked_number_title(self, repo: str): | |
return f"[{self.number_title}]({get_issue_link(repo, self.number)})" | |
def mentioned_issues_links(self, repo: str) -> str: | |
links = [] | |
for i in self.mentioned_issues: | |
links.append(f"[{i}]({get_issue_link(repo, i)})") | |
return ", ".join(links) | |
def extract_pr_info(pr: dict) -> PullRequestInfo: | |
body = pr["body"] | |
if body is None: | |
matches = [] | |
else: | |
matches = re.findall(r"#\d+", body) | |
mentioned_issues: list[str] = [] | |
for issue in matches: | |
issue = issue.strip("#") | |
if issue not in mentioned_issues: | |
mentioned_issues.append(issue) | |
return PullRequestInfo( | |
title=pr["title"], | |
number=str(pr["number"]), | |
author=pr["user"]["login"], | |
mentioned_issues=mentioned_issues, | |
) | |
def search_query_to_markdown(repo: str, search_query: str) -> str: | |
prs = get_pull_requests(search_query, repo, n_pages=1000) | |
prs = [extract_pr_info(pr) for pr in prs] | |
out = PullRequestInfo.get_md_header() | |
for pr in prs: | |
out += pr.to_md_row(repo) | |
return out | |
def main(): | |
parser = argparse.ArgumentParser( | |
description=CLI_DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter | |
) | |
parser.add_argument( | |
"repo", type=str, help="Owner and repository name in the format owner/repo" | |
) | |
parser.add_argument("search_query", type=str, help="GitHub search query") | |
args = parser.parse_args() | |
out = search_query_to_markdown(args.repo, args.search_query) | |
print(out) | |
if __name__ == "__main__": | |
# out = search_query_to_markdown(repo="pydata/xarray", search_query="is:pr reviewed-by:dcherian") | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment