Created
December 2, 2016 17:47
-
-
Save neilernst/3201822719f610f7732194aba9d108b3 to your computer and use it in GitHub Desktop.
Python3 script to take a directory of Github issues and find the related files changed
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Retrieve, for a list of issues, the associated commits and files | |
# First run Octohub's offline issues plugin: https://github.com/turnkeylinux/octohub/tree/master/contrib/offline-issues | |
# Assumes relevant issue data stored in octohub "all" dir | |
# Author: @neilernst | |
# Copyright: SEI | |
# Licence: BSD | |
import logging, os, sys | |
import simplejson as json | |
from octohub.connection import Connection | |
from octohub.exceptions import ResponseError | |
logging.basicConfig(level=logging.INFO) # change to INFO to see details | |
repo_fqn = '/repos/eclipse/che' # change this to the project you care about | |
OCTOHUB_TOKEN=os.environ.get('OCTOHUB_TOKEN') # set this in environment prior to script | |
issues_dir = 'che/all' # read issue json from here | |
out_dir = 'che/out' # store to issue_id.json files here | |
for jfile in os.listdir(path=issues_dir): | |
# load the issues from JSON from the Octohub directory | |
logging.info('Reading issue # ' + jfile) | |
with open(issues_dir + '/' + jfile, 'r') as issue_json: | |
data = json.load(issue_json) | |
events_url = data['events_url'] | |
if OCTOHUB_TOKEN: | |
conn = Connection(OCTOHUB_TOKEN) # 5000 queries/h | |
else: | |
conn = Connection() # 60 queries/h | |
events_url = repo_fqn + '/issues/' + jfile + '/events' | |
try: | |
response = conn.send('GET', events_url) | |
except ResponseError as err: | |
logging.error("Response error: {0}".format(err)) | |
logging.error("Error occurred while trying " + events_url) | |
sys.exit() | |
commit_shas = [] | |
# each issue has events, some of which have commits associated | |
for event in response.parsed: | |
if event.commit_id: | |
commit_shas.append(event.commit_id) | |
# each event with a commit has a SHA id, and a list of files | |
commit_files_dict = {} | |
for commit_sha in commit_shas: | |
logging.info("looking for commit " + commit_sha) | |
uri = repo_fqn + '/commits/' + commit_sha | |
try: | |
response = conn.send('GET', uri) | |
# TODO: handle case where URL doesn't exist, i.e. commit has been rebased or otherwise disappeared | |
except ResponseError as err: | |
# logging.error("Response error: {0}".format(err)) | |
logging.error("Error occurred while trying " + uri) | |
if err.args[0]['message'] == "Not Found": | |
logging.error("URL " + uri + " returned 404 Not Found.") | |
break | |
else: | |
sys.exit() | |
files = response.parsed['files'] | |
file_list = [] | |
for git_file in files: | |
file_list.append(git_file['filename']) | |
commit_files_dict[commit_sha] = file_list | |
if len(commit_files_dict) > 0: | |
with open(out_dir + '/' + jfile + '.json', 'a') as out_json: | |
json.dump(commit_files_dict, out_json) | |
else: | |
logging.info("Skipping issue #" + jfile + ", no commits") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment