Skip to content

Instantly share code, notes, and snippets.

@eteq
Created July 5, 2014 17:00
Show Gist options
  • Save eteq/9faf252ec8aa41188dcb to your computer and use it in GitHub Desktop.
Save eteq/9faf252ec8aa41188dcb to your computer and use it in GitHub Desktop.
A script to query github and find out how many PRs and issues a project has
from __future__ import print_function
"""
This script will use PyPI to identify a release of a package, and then search
through github to get a count of all of the issues and PRs closed/merged since
that release.
Usage:
python gh_issuereport.py astropy/astropy astropy/0.3
Note that it will prompt you for yout github username/password. This isn't
necessary if you have less than 6000 combined issues/PRs, but if you have more
than that (or run the script multiple times in an hour without caching), you
might hit github's 60 api calls per hour limit (which increases to 5000 if you
log in).
Also note that running this will by default cache the PRs/issues in "prs.json"
and "issues.json". Give it the "-n" option to not do that.
Requires the requests package (https://pypi.python.org/pypi/requests/).
"""
import os
import json
import datetime
import requests
GH_API_BASE_URL = 'https://api.github.com'
ISO_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
def paginate_list_request(req, verbose=False, auth=None):
elems = []
currreq = req
i = 1
while 'next' in currreq.links:
elems.extend(currreq.json())
i += 1
if verbose:
print('Doing request', i, 'of', currreq.links['last']['url'].split('page=')[-1])
currreq = requests.get(currreq.links['next']['url'], auth=auth)
elems.extend(currreq.json())
return elems
def count_issues_since(dt, repo, auth=None, verbose=True, cacheto=None):
if cacheto and os.path.exists(cacheto):
with open(cacheto) as f:
isslst = json.load(f)
else:
url = GH_API_BASE_URL + '/repos/' + repo + '/issues?per_page=100&state=all'
req = requests.get(url, auth=auth)
isslst = paginate_list_request(req, verbose, auth=auth)
if cacheto:
with open(cacheto, 'w') as f:
json.dump(isslst, f)
nopened = nclosed = 0
for entry in isslst:
createddt = datetime.datetime.strptime(entry['created_at'], ISO_FORMAT)
if createddt > dt:
nopened += 1
if entry['closed_at']:
closeddt = datetime.datetime.strptime(entry['closed_at'], ISO_FORMAT)
if closeddt > dt:
nclosed += 1
return {'opened': nopened, 'closed': nclosed}
def count_prs_since(dt, repo, auth=None, verbose=True, cacheto=None):
if cacheto and os.path.exists(cacheto):
with open(cacheto) as f:
prlst = json.load(f)
else:
url = GH_API_BASE_URL + '/repos/' + repo + '/pulls?per_page=100&state=all'
req = requests.get(url, auth=auth)
prlst = paginate_list_request(req, verbose, auth=auth)
if cacheto:
with open(cacheto, 'w') as f:
json.dump(prlst, f)
nopened = nclosed = 0
usersopened = []
usersclosed = []
for entry in prlst:
createddt = datetime.datetime.strptime(entry['created_at'], ISO_FORMAT)
if createddt > dt:
nopened += 1
usersopened.append(entry['user']['id'])
if entry['merged_at']:
closeddt = datetime.datetime.strptime(entry['merged_at'], ISO_FORMAT)
if closeddt > dt:
nclosed += 1
usersclosed.append(entry['user']['id'])
return {'opened': nopened, 'merged': nclosed,
'usersopened': len(set(usersopened)),
'usersmerged': len(set(usersclosed))}
def get_datetime_of_pypi_version(pkg, version):
from xml.dom import minidom
url = 'http://pypi.python.org/pypi/{0}/{1}'.format(pkg, version)
dom = minidom.parseString(requests.get(url).content)
table = dom.getElementsByTagName('table')[0]
t = table.getElementsByTagName('tr')[1].getElementsByTagName('td')[-2].firstChild
if t.nodeType != t.TEXT_NODE:
raise ValueError("pypi page for {0}/{1} ddoesn't seem to have a date".format(pkg, version))
else:
datestr = t.data
return datetime.datetime.strptime(datestr, "%Y-%m-%d")
if __name__ == '__main__':
import argparse
from getpass import getpass
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('repo', help='the github repo to use')
parser.add_argument('package', help='the package/version to lookup on pypi '
'or "None" to skip the lookup')
parser.add_argument('-q', '--quiet', help='hide informational messages',
dest='verbose', action='store_false')
parser.add_argument('-n', '--no-cache', help="don't cache the downloaded "
"issue/PR info (and don't read "
"the cached versions)",
dest='cache', action='store_false')
args = parser.parse_args()
if args.package.lower() == 'none':
# probably nothing on github was created before the year 1900...
pkgdt = datetime.datetime(1900, 1, 1)
if args.verbose:
print('Not looking up a PyPI package')
else:
pkgdt = get_datetime_of_pypi_version(*args.package.split('/'))
if args.verbose:
print('Found PyPI entry for', args.package, ':', pkgdt)
un = raw_input('Github username (blank for no auth): ')
if un:
auth = (un, getpass())
else:
auth = None
if args.cache:
icache = 'issues.json'
prcache = 'prs.json'
else:
icache = prcache = None
icnt = count_issues_since(pkgdt, args.repo, auth=auth, verbose=args.verbose, cacheto=icache)
prcnt = count_prs_since(pkgdt, args.repo, auth=auth, verbose=args.verbose, cacheto=prcache)
print(icnt['opened'], 'issues opened since', args.package, 'and', icnt['closed'], 'issues closed')
print(prcnt['opened'], 'PRs opened since', args.package, 'and', prcnt['merged'], 'PRs merged')
print(prcnt['usersopened'], 'unique users opened PRs, and', prcnt['usersmerged'], 'of them got it merged')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment