VeckoTheGecko · January 15, 2025 23:06
diff --git a/github-to-md.py b/github-to-md.py
 """
 CLI tool for converting GitHub search queries to markdown tables.

 Motivation:
    GitHub search syntax is powerful to search for specific sets of pull requests.
    This tool allows you to easily extract information on pull requests and mentioned issues
    in a markdown table with links to be included in a markdown file.

 Look at the CLI_DESCRIPTION variable, or run `python github-to-md.py --help` for more info.
 """


 import argparse
 from dataclasses import dataclass
 import requests
 import re
 import os

 CLI_DESCRIPTION = """
 Convert a GitHub a github search query for a repo to a markdown table. This is useful for
 quickly exporting certain searches for future reference. Table is in the following format:

 | PR  | Author | Mentioned Issues |
 | --- | ------ | ---------------- |
 ...

 The PR field includes the title. All PR and issue mentions are links.

 GITHUB_TOKEN environment variable must be set with a GitHub personal access token.
 examples:
    python github-to-md.py 'pydata/xarray' 'is:pull-request author:VeckoTheGecko'
 """



 def get_github_token() -> str:
    try:
        return os.environ["GITHUB_TOKEN"]  # GitHub personal access token
    except KeyError as e:
        print("GITHUB_TOKEN environment variable not set.")
        raise e


 def get_headers():
    return {
        "Authorization": f"token {get_github_token()}",
        "Accept": "application/vnd.github.v3+json",
    }


 def get_issue_link(repo: str, issue_number: str) -> str:
    return f"https://github.com/{repo}/issues/{issue_number}"


 def get_pull_requests(
    query: str,
    repo: str,
    search_url="https://api.github.com/search/issues",
    n_pages: int = 1,
    pull_requests: list[dict] | None = None,
 ):
    if pull_requests is None:
        pull_requests = []

    full_query = f"{query} repo:{repo}"
    headers = get_headers()
    try:
        response = requests.get(search_url, headers=headers, params={"q": full_query})
        if response.status_code == 200:
            pull_requests_iter = response.json().get("items", [])
            pull_requests.extend(pull_requests_iter)
        else:
            print(
                f"Error: {response.status_code} - {response.json().get('message', 'Unknown error')}"
            )
        next_page_url = response.links.get("next", {}).get("url")
    except Exception as e:
        print(f"An error occurred: {e}")

    if n_pages <= 0:
        return pull_requests_iter

    if next_page_url is None:
        return pull_requests
    return get_pull_requests(
        query,
        repo,
        search_url=next_page_url,
        n_pages=n_pages - 1,
        pull_requests=pull_requests,
    )


 @dataclass
 class PullRequestInfo:
    title: str
    number: str
    author: str
    mentioned_issues: list[str]

    @staticmethod
    def get_md_header() -> str:
        out = "| PR | Author | Mentioned Issues |\n"
        out += "| --- | --- | --- |\n"
        return out

    def to_md_row(self, repo: str):
        return f"| {self.linked_number_title(repo)} |  @{self.author} | {self.mentioned_issues_links(repo)} |\n"

    @property
    def number_title(self):
        return f"[{self.number}] {self.title}"

    def linked_number_title(self, repo: str):
        return f"[{self.number_title}]({get_issue_link(repo, self.number)})"

    def mentioned_issues_links(self, repo: str) -> str:
        links = []
        for i in self.mentioned_issues:
            links.append(f"[{i}]({get_issue_link(repo, i)})")
        return ", ".join(links)


 def extract_pr_info(pr: dict) -> PullRequestInfo:
    body = pr["body"]

    if body is None:
        matches = []
    else:
        matches = re.findall(r"#\d+", body)

    mentioned_issues: list[str] = []
    for issue in matches:
        issue = issue.strip("#")
        if issue not in mentioned_issues:
            mentioned_issues.append(issue)

    return PullRequestInfo(
        title=pr["title"],
        number=str(pr["number"]),
        author=pr["user"]["login"],
        mentioned_issues=mentioned_issues,
    )


 def search_query_to_markdown(repo: str, search_query: str) -> str:
    prs = get_pull_requests(search_query, repo, n_pages=1000)

    prs = [extract_pr_info(pr) for pr in prs]

    out = PullRequestInfo.get_md_header()
    for pr in prs:
        out += pr.to_md_row(repo)

    return out


 def main():
    parser = argparse.ArgumentParser(
        description=CLI_DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter
    )
    parser.add_argument(
        "repo", type=str, help="Owner and repository name in the format owner/repo"
    )
    parser.add_argument("search_query", type=str, help="GitHub search query")
    args = parser.parse_args()
    out = search_query_to_markdown(args.repo, args.search_query)
    print(out)


 if __name__ == "__main__":
    # out = search_query_to_markdown(repo="pydata/xarray", search_query="is:pr reviewed-by:dcherian")
    main()
	"""
	CLI tool for converting GitHub search queries to markdown tables.

	Motivation:
	GitHub search syntax is powerful to search for specific sets of pull requests.
	This tool allows you to easily extract information on pull requests and mentioned issues
	in a markdown table with links to be included in a markdown file.

	Look at the CLI_DESCRIPTION variable, or run `python github-to-md.py --help` for more info.
	"""


	import argparse
	from dataclasses import dataclass
	import requests
	import re
	import os

	CLI_DESCRIPTION = """
	Convert a GitHub a github search query for a repo to a markdown table. This is useful for
	quickly exporting certain searches for future reference. Table is in the following format:

	\| PR \| Author \| Mentioned Issues \|
	\| --- \| ------ \| ---------------- \|
	...

	The PR field includes the title. All PR and issue mentions are links.

	GITHUB_TOKEN environment variable must be set with a GitHub personal access token.
	examples:
	python github-to-md.py 'pydata/xarray' 'is:pull-request author:VeckoTheGecko'
	"""



	def get_github_token() -> str:
	try:
	return os.environ["GITHUB_TOKEN"] # GitHub personal access token
	except KeyError as e:
	print("GITHUB_TOKEN environment variable not set.")
	raise e


	def get_headers():
	return {
	"Authorization": f"token {get_github_token()}",
	"Accept": "application/vnd.github.v3+json",
	}


	def get_issue_link(repo: str, issue_number: str) -> str:
	return f"https://github.com/{repo}/issues/{issue_number}"


	def get_pull_requests(
	query: str,
	repo: str,
	search_url="https://api.github.com/search/issues",
	n_pages: int = 1,
	pull_requests: list[dict] \| None = None,
	):
	if pull_requests is None:
	pull_requests = []

	full_query = f"{query} repo:{repo}"
	headers = get_headers()
	try:
	response = requests.get(search_url, headers=headers, params={"q": full_query})
	if response.status_code == 200:
	pull_requests_iter = response.json().get("items", [])
	pull_requests.extend(pull_requests_iter)
	else:
	print(
	f"Error: {response.status_code} - {response.json().get('message', 'Unknown error')}"
	)
	next_page_url = response.links.get("next", {}).get("url")
	except Exception as e:
	print(f"An error occurred: {e}")

	if n_pages <= 0:
	return pull_requests_iter

	if next_page_url is None:
	return pull_requests
	return get_pull_requests(
	query,
	repo,
	search_url=next_page_url,
	n_pages=n_pages - 1,
	pull_requests=pull_requests,
	)


	@dataclass
	class PullRequestInfo:
	title: str
	number: str
	author: str
	mentioned_issues: list[str]

	@staticmethod
	def get_md_header() -> str:
	out = "\| PR \| Author \| Mentioned Issues \|\n"
	out += "\| --- \| --- \| --- \|\n"
	return out

	def to_md_row(self, repo: str):
	return f"\| {self.linked_number_title(repo)} \| @{self.author} \| {self.mentioned_issues_links(repo)} \|\n"

	@property
	def number_title(self):
	return f"[{self.number}] {self.title}"

	def linked_number_title(self, repo: str):
	return f"[{self.number_title}]({get_issue_link(repo, self.number)})"

	def mentioned_issues_links(self, repo: str) -> str:
	links = []
	for i in self.mentioned_issues:
	links.append(f"[{i}]({get_issue_link(repo, i)})")
	return ", ".join(links)


	def extract_pr_info(pr: dict) -> PullRequestInfo:
	body = pr["body"]

	if body is None:
	matches = []
	else:
	matches = re.findall(r"#\d+", body)

	mentioned_issues: list[str] = []
	for issue in matches:
	issue = issue.strip("#")
	if issue not in mentioned_issues:
	mentioned_issues.append(issue)

	return PullRequestInfo(
	title=pr["title"],
	number=str(pr["number"]),
	author=pr["user"]["login"],
	mentioned_issues=mentioned_issues,
	)


	def search_query_to_markdown(repo: str, search_query: str) -> str:
	prs = get_pull_requests(search_query, repo, n_pages=1000)

	prs = [extract_pr_info(pr) for pr in prs]

	out = PullRequestInfo.get_md_header()
	for pr in prs:
	out += pr.to_md_row(repo)

	return out


	def main():
	parser = argparse.ArgumentParser(
	description=CLI_DESCRIPTION, formatter_class=argparse.RawTextHelpFormatter
	)
	parser.add_argument(
	"repo", type=str, help="Owner and repository name in the format owner/repo"
	)
	parser.add_argument("search_query", type=str, help="GitHub search query")
	args = parser.parse_args()
	out = search_query_to_markdown(args.repo, args.search_query)
	print(out)


	if __name__ == "__main__":
	# out = search_query_to_markdown(repo="pydata/xarray", search_query="is:pr reviewed-by:dcherian")
	main()