Created
March 19, 2021 22:59
-
-
Save jdurgin/03104314cd4953ffb38d412f9e9e894d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # Originally modified from A. Israel's script seen at | |
| # https://gist.github.com/aisrael/b2b78d9dfdd176a232b9 | |
| """To run this script first install the dependencies | |
| virtualenv v | |
| source v/bin/activate | |
| pip install githubpy GitPython requests | |
| Generate a github access token; this is needed as the anonymous access | |
| to Github's API will easily hit the limit even with a single invocation. | |
| For details see: | |
| https://help.github.com/articles/creating-an-access-token-for-command-line-use/ | |
| Next either set the github token as an env variable | |
| `GITHUB_ACCESS_TOKEN` or alternatively invoke the script with | |
| `--token` switch. | |
| Example: | |
| ceph-release-notes -r tags/v0.87..origin/giant \ | |
| $(git rev-parse --show-toplevel) | |
| """ | |
| from __future__ import print_function | |
| import argparse | |
| import github | |
| import os | |
| import re | |
| import sys | |
| import requests | |
| import time | |
| from git import Repo | |
| fixes_re = re.compile(r"Fixes\:? #(\d+)") | |
| reviewed_by_re = re.compile(r"Rev(.*)By", re.IGNORECASE) | |
| # labels is the list of relevant labels defined for github.com/ceph/ceph | |
| labels = {'bluestore', 'build/ops', 'cephfs', 'common', 'core', 'mgr', | |
| 'mon', 'performance', 'pybind', 'rdma', 'rgw', 'rbd', 'tests', | |
| 'tools'} | |
| merge_re = re.compile("Merge (pull request|PR) #(\d+).*") | |
| # prefixes is the list of commit description prefixes we recognize | |
| prefixes = ['bluestore', 'build/ops', 'cephfs', 'cephx', 'cli', 'cmake', | |
| 'common', 'core', 'crush', 'doc', 'fs', 'librados', 'librbd', | |
| 'log', 'mds', 'mgr', 'mon', 'msg', 'objecter', 'osd', 'pybind', | |
| 'rbd', 'rbd-mirror', 'rbd-nbd', 'rgw', 'tests', 'tools'] | |
| signed_off_re = re.compile("Signed-off-by: (.+) <") | |
| tracker_re = re.compile("http://tracker.ceph.com/issues/(\d+)") | |
| rst_link_re = re.compile(r"([a-zA-Z0-9])_(\W)") | |
| tracker_uri = "http://tracker.ceph.com/issues/{0}.json" | |
| def get_original_issue(issue, verbose): | |
| r = requests.get(tracker_uri.format(issue), | |
| params={"include": "relations"}).json() | |
| # looking up for the original issue only makes sense | |
| # when dealing with an issue in the Backport tracker | |
| if r["issue"]["tracker"]["name"] != "Backport": | |
| if verbose: | |
| print ("http://tracker.ceph.com/issues/" + issue + | |
| " is from the tracker " + r["issue"]["tracker"]["name"] + | |
| ", do not look for the original issue") | |
| return issue | |
| # if a Backport issue does not have a relation, keep it | |
| if "relations" not in r["issue"]: | |
| if verbose: | |
| print ("http://tracker.ceph.com/issues/" + issue + | |
| " has no relations, do not look for the original issue") | |
| return issue | |
| copied_to = [ | |
| str(i['issue_id']) for i in r["issue"]["relations"] | |
| if i["relation_type"] == "copied_to" | |
| ] | |
| if copied_to: | |
| if len(copied_to) > 1: | |
| if verbose: | |
| print ("ERROR: http://tracker.ceph.com/issues/" + issue + | |
| " has more than one Copied To relation") | |
| return issue | |
| if verbose: | |
| print ("http://tracker.ceph.com/issues/" + issue + | |
| " is the backport of http://tracker.ceph.com/issues/" + | |
| copied_to[0]) | |
| return copied_to[0] | |
| else: | |
| if verbose: | |
| print ("http://tracker.ceph.com/issues/" + issue + | |
| " has no copied_to relations; do not look for the" + | |
| " original issue") | |
| return issue | |
| def split_component(title, gh, number): | |
| title_re = '(' + '|'.join(prefixes) + ')(:.*)' | |
| match = re.match(title_re, title) | |
| if match: | |
| return match.group(1)+match.group(2) | |
| else: | |
| issue = gh.repos("ceph")("ceph").issues(number).get() | |
| issue_labels = {it['name'] for it in issue['labels']} | |
| if 'documentation' in issue_labels: | |
| return 'doc: ' + title | |
| item = set(prefixes).intersection(issue_labels) | |
| if item: | |
| return ",".join(sorted(item)) + ': ' + title | |
| else: | |
| return 'UNKNOWN: ' + title | |
| def _title_message(commit, pr, strict): | |
| title = pr['title'] | |
| message_lines = commit.message.split('\n') | |
| if strict or len(message_lines) < 1: | |
| return (title, None) | |
| lines = [] | |
| for line in message_lines[1:]: | |
| if reviewed_by_re.match(line): | |
| continue | |
| line = line.strip() | |
| if line: | |
| lines.append(line) | |
| if len(lines) == 0: | |
| return (title, None) | |
| duplicates_pr_title = lines[0] == pr['title'].strip() | |
| if duplicates_pr_title: | |
| return (title, None) | |
| assert len(lines) > 0, "missing message content" | |
| if len(lines) == 1: | |
| # assume that a single line means the intention is to | |
| # re-write the PR title | |
| return (lines[0], None) | |
| message = " " + "\n ".join(lines) | |
| return (title, message) | |
| def make_release_notes(gh, repo, ref, plaintext, html, verbose, strict, use_tags): | |
| issue2prs = {} | |
| pr2issues = {} | |
| pr2info = {} | |
| for commit in repo.iter_commits(ref, merges=True): | |
| merge = merge_re.match(commit.summary) | |
| if not merge: | |
| continue | |
| number = merge.group(2) | |
| print ("Considering PR#" + number) | |
| # do not pick up ceph/ceph-qa-suite.git PRs | |
| if int(number) < 1311: | |
| print ("Ignoring low-numbered PR, probably picked up from" | |
| " ceph/ceph-qa-suite.git") | |
| continue | |
| attempts = 0 | |
| retries = 30 | |
| while attempts < retries: | |
| try: | |
| pr = gh.repos("ceph")("ceph").pulls(number).get() | |
| break | |
| except Exception: | |
| if attempts < retries: | |
| attempts += 1 | |
| time.sleep(attempts * 2) | |
| else: | |
| raise | |
| (title, message) = _title_message(commit, pr, strict) | |
| issues = [] | |
| if pr['body']: | |
| issues = fixes_re.findall(pr['body']) + tracker_re.findall( | |
| pr['body'] | |
| ) | |
| authors = {} | |
| for c in repo.iter_commits( | |
| "{sha1}^1..{sha1}^2".format(sha1=commit.hexsha) | |
| ): | |
| for author in re.findall( | |
| "Signed-off-by:\s*(.*?)\s*<", c.message | |
| ): | |
| authors[author] = 1 | |
| issues.extend(fixes_re.findall(c.message) + | |
| tracker_re.findall(c.message)) | |
| if authors: | |
| author = ", ".join(authors.keys()) | |
| else: | |
| author = commit.parents[-1].author.name | |
| if strict and not issues: | |
| print ("ERROR: https://github.com/ceph/ceph/pull/" + | |
| str(number) + " has no associated issue") | |
| continue | |
| if strict: | |
| title_re = ( | |
| '^(?:hammer|infernalis|jewel|kraken|luminous|mimic|nautilus|octopus|pacific):\s+(' + | |
| '|'.join(prefixes) + | |
| ')(:.*)' | |
| ) | |
| match = re.match(title_re, title) | |
| if not match: | |
| print ("ERROR: https://github.com/ceph/ceph/pull/" + | |
| str(number) + " title " + title.encode("utf-8") + | |
| " does not match " + title_re) | |
| else: | |
| title = match.group(1) + match.group(2) | |
| if use_tags: | |
| title = split_component(title, gh, number) | |
| title = title.strip(' \t\n\r\f\v\.\,\;\:\-\=') | |
| # escape asterisks, which is used by reStructuredTextrst for inline | |
| # emphasis | |
| title = title.replace('*', '\*') | |
| # and escape the underscores for noting a link | |
| title = rst_link_re.sub(r'\1\_\2', title) | |
| pr2info[number] = (author, title, message) | |
| for issue in set(issues): | |
| if strict: | |
| issue = get_original_issue(issue, verbose) | |
| issue2prs.setdefault(issue, set([])).add(number) | |
| pr2issues.setdefault(number, set([])).add(issue) | |
| sys.stdout.write('.') | |
| print (" done collecting merges.") | |
| if strict: | |
| for (issue, prs) in issue2prs.items(): | |
| if len(prs) > 1: | |
| print (">>>>>>> " + str(len(prs)) + " pr for issue " + | |
| issue + " " + str(prs)) | |
| for (pr, (author, title, message)) in sorted( | |
| pr2info.items(), key=lambda title: title[1][1] | |
| ): | |
| if pr in pr2issues: | |
| if plaintext: | |
| issues = map(lambda issue: '#' + str(issue), pr2issues[pr]) | |
| elif html: | |
| issues = map(lambda issue: ( | |
| '<a href="http://tracker.ceph.com/issues/{issue}">issue#{issue}</a>' | |
| ).format(issue=issue), pr2issues[pr] | |
| ) | |
| else: | |
| issues = map(lambda issue: ( | |
| '`issue#{issue} <http://tracker.ceph.com/issues/{issue}>`_' | |
| ).format(issue=issue), pr2issues[pr] | |
| ) | |
| issues = ", ".join(issues) + ", " | |
| else: | |
| issues = '' | |
| if plaintext: | |
| print ("* {title} ({issues}{author})".format( | |
| title=title.encode("utf-8"), | |
| issues=issues, | |
| author=author.encode("utf-8") | |
| ) | |
| ) | |
| elif html: | |
| print ( | |
| ( | |
| "<li><p>{title} ({issues}<a href=\"" | |
| "https://github.com/ceph/ceph/pull/{pr}\"" | |
| ">pr#{pr}</a>, {author})</p></li>" | |
| ).format( | |
| title=title.encode("utf-8"), | |
| issues=issues, | |
| author=author.encode("utf-8"), pr=pr | |
| ) | |
| ) | |
| else: | |
| print ( | |
| ( | |
| "* {title} ({issues}`pr#{pr} <" | |
| "https://github.com/ceph/ceph/pull/{pr}" | |
| ">`_, {author})" | |
| ).format( | |
| title=title.encode("utf-8"), | |
| issues=issues, | |
| author=author.encode("utf-8"), pr=pr | |
| ) | |
| ) | |
| if message: | |
| print (message) | |
| if __name__ == "__main__": | |
| desc = ''' | |
| Make ceph release notes for a given revision. Eg usage: | |
| $ ceph-release-notes -r tags/v0.87..origin/giant \ | |
| $(git rev-parse --show-toplevel) | |
| It is recommended to set the github env. token in order to avoid | |
| hitting the api rate limits. | |
| ''' | |
| parser = argparse.ArgumentParser( | |
| description=desc, | |
| formatter_class=argparse.RawTextHelpFormatter | |
| ) | |
| parser.add_argument("--rev", "-r", | |
| help="git revision range for creating release notes") | |
| parser.add_argument("--text", "-t", | |
| action='store_true', default=None, | |
| help="output plain text only, no links") | |
| parser.add_argument("--html", | |
| action='store_true', default=None, | |
| help="output html format for website blog") | |
| parser.add_argument("--verbose", "-v", | |
| action='store_true', default=None, | |
| help="verbose") | |
| parser.add_argument("--strict", | |
| action='store_true', default=None, | |
| help="strict, recommended only for backport releases") | |
| parser.add_argument("repo", metavar="repo", | |
| help="path to ceph git repo") | |
| parser.add_argument( | |
| "--token", | |
| default=os.getenv("GITHUB_ACCESS_TOKEN"), | |
| help="Github Access Token ($GITHUB_ACCESS_TOKEN otherwise)", | |
| ) | |
| parser.add_argument("--use-tags", default=False, | |
| help="Use github tags to guess the component") | |
| args = parser.parse_args() | |
| gh = github.GitHub( | |
| access_token=args.token) | |
| make_release_notes( | |
| gh, | |
| Repo(args.repo), | |
| args.rev, | |
| args.text, | |
| args.html, | |
| args.verbose, | |
| args.strict, | |
| args.use_tags | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment