Last active
July 26, 2019 11:41
-
-
Save embray/4497178 to your computer and use it in GitHub Desktop.
List pull requests merged since the last tag on the given release branch which have not yet been merged into the release branch.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# I wasn't happy with any of the GitHub libraries for Python that I tried so I | |
# just used the GitHub API directly. If someone would like to rewrite this | |
# using a library please be my guest | |
from __future__ import unicode_literals | |
import argparse | |
import base64 | |
import getpass | |
import io | |
import json | |
import logging | |
import os | |
import re | |
import stat | |
import sys | |
try: | |
from urllib.request import Request, urlopen | |
from urllib.error import HTTPError | |
from urllib.parse import urlencode | |
except ImportError: | |
from urllib2 import Request, urlopen, HTTPError | |
from urllib import urlencode | |
try: | |
input = raw_input | |
except NameError: | |
pass | |
# Because pkg_resources provides better version parsing than distutils | |
import pkg_resources | |
BASE_URL = 'https://api.github.com/repos/' | |
# This regex ensures that only the 'Conflicts:' section at the end of the | |
# commit message is matched (in case there are multiple 'Conflicts' sections | |
# which can happen). | |
CONFLICTS_RE = re.compile(r'((?:.|\n)+)\nConflicts:(\n.*)+', flags=re.M) | |
log = logging.getLogger() | |
class _MaxLevelFilter(logging.Filter): | |
def __init__(self, maxlevel): | |
self.maxlevel = maxlevel | |
def filter(self, record): | |
return record.levelno <= self.maxlevel | |
class GithubRequestError(Exception): | |
pass | |
class GithubSuggestBackports(object): | |
# Cache all the commits found for the given branch so we don't have to | |
# re-request them for each pull request | |
_cached_commits = [] | |
def __init__(self, owner, repo, branch, username=None, password=None): | |
self.owner = owner | |
self.repo = repo | |
self.branch = branch | |
if username is not None and password is not None: | |
# We can't rely on urllib2 to handle basic authentication in the | |
# normal way since GitHub requests don't always have | |
# www-authenticate in the headers | |
auth = ':'.join((username, password)).encode('ascii') | |
self._auth = base64.b64encode(auth).decode('ascii') | |
else: | |
self._auth = None | |
def _github_repo_request(self, *resource, **parameters): | |
resource = tuple(str(r) for r in resource) | |
url = BASE_URL + '/'.join((self.owner, self.repo) + resource) | |
if parameters: | |
url += '?' + urlencode(parameters) | |
log.debug('Requesting ' + url) | |
req = Request(url) | |
if self._auth: | |
req.add_header('Authorization', 'Basic ' + self._auth) | |
try: | |
f = urlopen(req) | |
enc = f.headers.get_content_charset() | |
content = f.read().decode(enc) | |
response = json.loads(content) | |
except HTTPError as e: | |
response = json.loads(e.fp.read().decode('utf8')) | |
if 'message' in response: | |
raise GithubRequestError(response['message']) | |
raise e | |
return response | |
def get_tags(self): | |
return self._github_repo_request('tags') | |
def get_milestones(self, state=None): | |
parameters = {} | |
if state is not None: | |
parameters['state'] = state | |
return self._github_repo_request('milestones', **parameters) | |
def iter_issues(self, milestone=None, state=None): | |
parameters = {} | |
if milestone is not None: | |
parameters['milestone'] = milestone | |
if state is not None: | |
parameters['state'] = state | |
parameters['page'] = 1 | |
issues = [] | |
while True: | |
if not issues: | |
response = self._github_repo_request('issues', **parameters) | |
if response: | |
issues.extend(response) | |
parameters['page'] += 1 | |
else: | |
raise StopIteration | |
yield issues.pop(0) | |
def iter_issue_events(self, issue, filter_=None, count=None): | |
events = [] | |
page = 1 | |
while (count is None or count): | |
# Events can be paginated | |
if not events: | |
next = self._github_repo_request('issues', issue, 'events', | |
page=page) | |
if not next: | |
raise StopIteration | |
if filter_ is not None: | |
next = filter(lambda e: e['event'] == filter_, next) | |
events.extend(next) | |
page += 1 | |
# Either continue to the next page of events or start popping | |
# any found events off the first page | |
continue | |
yield events.pop(0) | |
count -= 1 | |
def get_pull_request_merge_commit(self, pr): | |
"""Returns the full commit object of the merge commit for a pull | |
request or `None` if the given PR has not been merged. | |
This is different from the commit named by merge_commit_sha listed in a | |
pull request in that it's the commit that actually goes into mainline | |
branch. The commit listed in merge_commit_sha only seems to be an | |
artifact of how GitHub implements pull requests. | |
""" | |
events = list(self.iter_issue_events(pr, filter_='merged', count=1)) | |
if events: | |
return self.get_commit(events[0]['commit_id']) | |
def get_commits(self, sha): | |
"""Get the first page of commits in the tree starting at sha. | |
Commits are returned 30 at a time and paginated according to sha. So in | |
order to get the second page of commits it's necessary to use a | |
subsequent call to get_commits using the sha of the last commit from | |
the previous call (which will be the first commit listed in the second | |
call). | |
""" | |
return self._github_repo_request('commits', sha=sha) | |
def get_commit(self, sha): | |
"""Return a single commit.""" | |
return self._github_repo_request('commits', sha) | |
def iter_pull_requests(self, state=None): | |
parameters = {} | |
if state is not None: | |
parameters['state'] = state | |
parameters['page'] = 1 | |
prs = [] | |
while True: | |
if not prs: | |
response = self._github_repo_request('pulls', **parameters) | |
if response: | |
prs.extend(response) | |
parameters['page'] += 1 | |
else: | |
raise StopIteration | |
yield prs.pop(0) | |
def get_pull_request(self, number): | |
try: | |
pr = self._github_repo_request('pulls', str(number)) | |
except GithubRequestError as e: | |
if e.message == 'Not Found': | |
return None | |
raise | |
return pr | |
def find_merged_commit(self, commit, since=None): | |
""" | |
Determines whether or not this commit was already merged into the | |
release branch, and if so returns the merge commit from the branch. | |
Returns `None` if the commit was not found to be merged. | |
""" | |
def expand_cache(): | |
if not self._cached_commits: | |
# Initialize with the first page of commits from the bug fix | |
# branch | |
next_commits = self.get_commits(self.branch) | |
else: | |
last_commit = self._cached_commits[-1] | |
if last_commit['commit']['committer']['date'] <= since: | |
return False | |
next_commits = self.get_commits(last_commit['sha'])[1:] | |
if next_commits: | |
self._cached_commits.extend(next_commits) | |
return True | |
else: | |
return False | |
idx = 0 | |
while True: | |
try: | |
merged_commit = self._cached_commits[idx] | |
except IndexError: | |
# Try growing the list of commits; but if there are no more to be | |
# found return None | |
if expand_cache(): | |
continue | |
return None | |
# For cherry-picks we can't rely on comparing the sha, but the | |
# author and commit message should be close enough | |
a = commit['commit'] | |
b = merged_commit['commit'] | |
# Remove conflicts from the cherry-picked commit's commit message; | |
# conflicts can cause the message to be different where it | |
# otherwise wouldn't have been, and we don't care if there were | |
# conflicts so long as it was merged successfully | |
b['message'] = CONFLICTS_RE.sub(r'\1', b['message']) | |
if a['author'] == b['author'] and a['message'] == b['message']: | |
return merged_commit | |
idx += 1 | |
def get_next_milestone(self): | |
"""Get the next open milestone that has the same version prefix as the | |
branch. For example if the repo has milestones v0.2.1 and v0.2.2 and the | |
branch is v0.2.x, this will return v0.2.1. | |
""" | |
prefix = self.branch[:-1] | |
milestones = [m for m in self.get_milestones(state='open') | |
if m['title'].startswith(prefix)] | |
sort_key = lambda m: int(m['title'].rsplit('.', 1)[1]) | |
return sorted(milestones, key=sort_key)[0] | |
_last_tag = None | |
def get_last_tag(self): | |
if self._last_tag is not None: | |
return self._last_tag | |
branch_ver = pkg_resources.parse_version(self.branch.lstrip('v')) | |
tags = sorted(self.get_tags(), | |
key=lambda t: pkg_resources.parse_version(t['name']), | |
reverse=True) | |
# Get the last tag that should be in this branch | |
for tag in tags: | |
tag_ver = pkg_resources.parse_version(tag['name'].lstrip('v')) | |
branch_base_ver = branch_ver[:branch_ver.index('*x')] | |
cmp_indx = len(branch_base_ver) | |
if tag_ver[:cmp_indx] == branch_ver[:cmp_indx]: | |
self._last_tag = tag | |
return tag | |
self._last_tag = False | |
_last_tag_commit = None | |
def get_last_tag_commit(self): | |
if self._last_tag_commit is not None: | |
return self._last_tag_commit | |
last_tag = self.get_last_tag() | |
if last_tag: | |
last_tag_commit = self.get_commit(last_tag['commit']['sha']) | |
else: | |
last_tag_commit = False | |
self._last_tag_commit = last_tag_commit | |
return last_tag_commit | |
def iter_suggested_prs(self): | |
next_milestone = self.get_next_milestone() | |
next_ms_num = next_milestone['number'] | |
log.info("Finding PRs in milestone {0} that haven't been merged into " | |
"{1}".format(next_milestone['title'], self.branch)) | |
log.info('Merge these into {0} by doing "git checkout {0}; git pull; ' | |
'git cherry-pick -m 1 <SHA>"'.format(self.branch)) | |
last_tag_commit = self.get_last_tag_commit() | |
if not last_tag_commit: | |
# There have *been* no tags of this release line so just quit | |
raise StopIteration | |
last_tag_date = last_tag_commit['commit']['committer']['date'] | |
# Get the issue #s of all closed issues in the relevant milestone | |
milestone_issues = set(issue['number'] for issue in | |
self.iter_issues(milestone=next_ms_num, | |
state='closed')) | |
# Now get all PRs and filter by whether or not they belong to the | |
# milestone; requesting them all at once is still faster than | |
# requesting one at a time. This would also be easier if the API | |
# supported sorting on PR lists | |
for pr in self.iter_pull_requests(state='closed'): | |
if (pr['number'] not in milestone_issues or not pr['merged_at']): | |
continue | |
merge_commit = self.get_pull_request_merge_commit(pr['number']) | |
# Ignore commits that were merged before the last tag date | |
if merge_commit['commit']['committer']['date'] < last_tag_date: | |
continue | |
if not self.find_merged_commit(merge_commit, | |
since=last_tag_date): | |
yield pr, merge_commit['sha'] | |
def main(argv): | |
parser = argparse.ArgumentParser( | |
description='Find pull requests that need be backported to a bug fix ' | |
'branch') | |
parser.add_argument('owner', metavar='OWNER', | |
help='owner of the repository') | |
parser.add_argument('repo', metavar='REPO', help='the repository name') | |
parser.add_argument('branch', metavar='BRANCH', | |
help='the name of the bug fix branch (eg. v0.2.x)') | |
parser.add_argument('-f', '--file', metavar='FILE', | |
help='save the cherry-pick script to a file; ' | |
'otherwise it is written to stdout') | |
parser.add_argument('--debug', action='store_true') | |
args = parser.parse_args(argv) | |
# Configure log | |
log.setLevel(logging.DEBUG) | |
stdout_handler = logging.StreamHandler(sys.stdout) | |
if args.debug: | |
stdout_handler.setLevel(logging.DEBUG) | |
else: | |
stdout_handler.setLevel(logging.INFO) | |
stdout_handler.addFilter(_MaxLevelFilter(logging.INFO)) | |
log.addHandler(stdout_handler) | |
stderr_handler = logging.StreamHandler(sys.stderr) | |
stderr_handler.setLevel(logging.WARNING) | |
log.addHandler(stderr_handler) | |
log.info("Enter your GitHub username and password so that API requests " | |
"aren't as severely rate-limited...") | |
username = input('Username: ') | |
password = getpass.getpass('Password: ') | |
suggester = GithubSuggestBackports(args.owner, args.repo, args.branch, | |
username, password) | |
pr_format = '[#{0}][{1}]: {2}' | |
suggestions = [] | |
for pr, sha in suggester.iter_suggested_prs(): | |
# If sys.stdout's default encoding has a limited codepage this blows up if | |
# the PR title contains unencodable characters =_= | |
title = pr['title'].encode('ascii', | |
errors='replace').decode('ascii') | |
log.info(pr_format.format(pr['number'], sha, title)) | |
suggestions.append((pr, sha)) | |
suggestions.sort(key=lambda p: p[0]['merged_at']) | |
script_lines = [ | |
'#!/bin/bash', | |
'# git commands:', | |
'git checkout {0} || exit 1'.format(args.branch), | |
'git pull upstream {0} || exit 1'.format(args.branch) | |
] | |
for pr, sha in suggestions: | |
script_lines.append('# ' + pr_format.format(pr['number'], sha, | |
pr['title'])) | |
script_lines.append('git cherry-pick -m 1 {0} || exit 1'.format(sha)) | |
if args.file: | |
with io.open(args.file, 'w', encoding='utf8') as f: | |
f.writelines(line + '\n' for line in script_lines) | |
os.chmod(args.file, stat.S_IRWXU) | |
else: | |
for line in script_lines: | |
log.info(line) | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv[1:])) |
For some reason I can't get this to work in either Python 2 or 3:
$ python3.4 suggest_backports.py astropy astropy v1.0.x
Enter your GitHub username and password so that API requests aren't as severely rate-limited...
Username:
Password:
Finding PRs in milestone v1.0.0 that haven't been merged into v1.0.x
Merge these into v1.0.x by doing "git checkout v1.0.x; git pull; git cherry-pick -m 1 <SHA>"
/Users/tom/Library/Python/3.4/lib/python/site-packages/pkg_resources/__init__.py:188: RuntimeWarning: You have iterated over the result of pkg_resources.parse_version. This is a legacy behavior which is inconsistent with the new version class introduced in setuptools 8.0. That class should be used directly instead of attempting to iterate over the result.
stacklevel=1,
Traceback (most recent call last):
File "suggest_backports.py", line 393, in <module>
sys.exit(main(sys.argv[1:]))
File "suggest_backports.py", line 362, in main
for pr, sha in suggester.iter_suggested_prs():
File "suggest_backports.py", line 301, in iter_suggested_prs
last_tag_date = last_tag_commit['commit']['committer']['date']
TypeError: 'bool' object is not subscriptable
mac-robitaille2:astropy tom$ python2.7 suggest_backports.py astropy astropy v1.0.x
Enter your GitHub username and password so that API requests aren't as severely rate-limited...
Username:
Password:
Traceback (most recent call last):
File "suggest_backports.py", line 393, in <module>
sys.exit(main(sys.argv[1:]))
File "suggest_backports.py", line 362, in main
for pr, sha in suggester.iter_suggested_prs():
File "suggest_backports.py", line 294, in iter_suggested_prs
next_milestone = self.get_next_milestone()
File "suggest_backports.py", line 257, in get_next_milestone
milestones = [m for m in self.get_milestones(state='open')
File "suggest_backports.py", line 107, in get_milestones
return self._github_repo_request('milestones', **parameters)
File "suggest_backports.py", line 89, in _github_repo_request
enc = f.headers.get_content_charset()
AttributeError: HTTPMessage instance has no attribute 'get_content_charset'
I haven't had time to look into the failures yet.
AttributeError: HTTPMessage instance has no attribute 'get_content_charset'
@astrofrog how you fixed this ?
@BlackRider97 I had to change line 89 to
enc = f.headers['Content-Type'].split('charset=')[1] # Content-Type will look like: application/json; charset=utf-8
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Okay, I merged your changes. For a future version I might have it generate a little shell script to run to do all the necessary commands.