-
-
Save psa-jforestier/9dc1735c43fb4af2da9324513676606d to your computer and use it in GitHub Desktop.
detect non-contributors in an organization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# you will need http://github3py.readthedocs.org/en/latest/#installation | |
# I'm not sure if this works on Windows due to the use of strptime() | |
# Based on https://gist.github.com/morido/9817399 | |
# Improvement : take user, pass and orga from command line + use the new github3 API | |
from github3 import login | |
import datetime | |
import json | |
import pprint | |
import logging | |
import sys # to use sys.stdout | |
import os | |
import argparse | |
if hasattr(__builtins__, 'raw_input'): | |
input = raw_input | |
# Turn off console output buffering | |
buf_arg = 0 | |
if sys.version_info[0] == 3: | |
os.environ['PYTHONUNBUFFERED'] = '1' | |
buf_arg = 1 | |
try: | |
sys.stdout = os.fdopen(sys.stdout.fileno(), 'a+', buf_arg) | |
sys.stderr = os.fdopen(sys.stderr.fileno(), 'a+', buf_arg) | |
except: | |
pass | |
parser = argparse.ArgumentParser(description=''' | |
List all active and inactive user of a GitHub organization.''') | |
parser.add_argument('-u',metavar='USER', dest='github_user', action='store', required=False, | |
help='GitHub user', | |
default=None) | |
parser.add_argument('-p',metavar='PASSWORD', dest='github_pass', action='store', required=False, | |
help='GitHub password', | |
default=None) | |
parser.add_argument('-o',metavar='ORG', dest='github_org', action='store', required=False, | |
help='GitHub organization to list. Default %(default)s', | |
default='D4UDigitalPlatform') | |
parser.add_argument('-w',metavar='WEEKS', dest='weeks', action='store', required=False, | |
help='Time of interests, in weeks. Default %(default)s', | |
default='52') | |
parser.add_argument('--url',metavar='URL', dest='github_url', action='store', required=False, | |
help='GitHub API url. Default %(default)s', | |
default='https://api.github.com') | |
args = parser.parse_args() | |
if (args.github_user == None): | |
args.github_user = input("GitHub Username :") | |
if (args.github_pass == None): | |
print "GitHub Password (sorry, it will be displayed in the console. Use -p instead" | |
args.github_pass = input(":") | |
time_of_interest_in_weeks = int(args.weeks) | |
# Attention: There is an hourly limit of 5000 requests to the GitHub-API. This script will use a lot of these... | |
token = login(args.github_user, args.github_pass) | |
organization = token.organization(args.github_org) | |
time_of_interest_in_weeks_absolute = datetime.datetime.today() - datetime.timedelta(weeks = time_of_interest_in_weeks) | |
openETCSusers = [] | |
inactiveopenETCSusers = [] | |
def removeFromInactiveList(loginname): | |
if loginname in inactiveopenETCSusers: | |
inactiveopenETCSusers.remove(loginname) | |
# Setup: Get all users in the organization | |
for current_repo in organization.repositories(): | |
print current_repo.name + " : " | |
try: | |
for current_contributor in current_repo.contributors(): | |
print " - " + current_contributor.login | |
openETCSusers.append(current_contributor.login) | |
except Exception as e: | |
print e | |
openETCSusers = list(set(openETCSusers)) # remove duplicates | |
inactiveopenETCSusers = list(openETCSusers) # initialize the inactive users | |
print "Found " + str(len(openETCSusers)) + " unique users" | |
# Main part: Remove all users who 'did something' | |
for current_repo in organization.repositories(): | |
print "Processing Repository", | |
print current_repo.name | |
# Step1: remove all users who made on a commit within time_of_interest_in_weeks | |
try: | |
#print "1/3 : Get commits" | |
commitShasToInvestigate = [] | |
for current_commit in current_repo.commits(since = time_of_interest_in_weeks_absolute): | |
current_commit = current_commit.as_dict() | |
if (current_commit.get('author') <> None): | |
removeFromInactiveList(current_commit.get('author').get('login')) | |
# usually author and committer are equal, but there are very few exceptions | |
if (current_commit.get('commiter') <> None): | |
removeFromInactiveList(current_commit.get('commiter').get('login')) | |
commitShasToInvestigate.append(current_commit.get('sha')) | |
# Note: This will probably fail to recognize contributions for commits opened in the distant past (before 'since') and updated very recently | |
except Exception as e: | |
print e | |
#print " - removing users from " + str(len(commitShasToInvestigate)) + " commits" | |
# Step2: remove all users who commented on a commit within time_of_interest_in_weeks | |
''' remove this part, seems to be very slow and not working as expected | |
for commit_to_investigate in commitShasToInvestigate: | |
print commit_to_investigate | |
for current_commit_comments in current_repo.commits(sha = commit_to_investigate): | |
current_commit_comments = current_commit_comments.as_dict() | |
if (current_commit_comments.get('author') <> None): | |
removeFromInactiveList(str(current_commit_comments.get('author').get('login'))) | |
''' | |
#print "3/3 : Need to inspect issues" | |
# Step3: remove all users who commented on something within time_of_interest_in_weeks | |
nbissue = 0 | |
try: | |
for current_issue in current_repo.issues(): | |
nbissue = nbissue + 1 | |
for current_comment in current_issue.comments(): | |
json_data = current_comment.to_json() | |
json_parsed = json.loads(json.dumps(json_data)) | |
JSON_TIMEFORMAT = '%Y-%m-%dT%H:%M:%SZ' | |
updated_at = datetime.datetime.strptime(json_parsed["updated_at"], JSON_TIMEFORMAT) | |
if updated_at > time_of_interest_in_weeks_absolute: | |
removeFromInactiveList(json_parsed["user"]["login"]) | |
print " - removing users from " + nbissue + " issues" | |
except Exception as e: | |
# no idea why this happens occassionally, some access rights issue? | |
print e | |
print "Inactive user remaining : " + str(len(inactiveopenETCSusers)) + " on " + str(len(openETCSusers)) | |
### | |
### OUTPUT | |
### | |
print '\n' | |
print 'Considered all contributions after:', | |
print str(time_of_interest_in_weeks_absolute) | |
print '\n' | |
print '---------------' | |
print 'Inactive users:' | |
print '---------------\n' | |
for current_user in inactiveopenETCSusers: | |
print current_user | |
print '\n' | |
print 'Summary:' | |
print '--------\n' | |
print '# All users: ', | |
print len(openETCSusers) | |
print '# Inactive users: ', | |
print len(inactiveopenETCSusers), | |
print ' since ' + str(time_of_interest_in_weeks) + ' weeks' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A new approach, written from scratch, with direct GitHub API (no need for an extra lib) is here : https://gist.github.com/psa-jforestier/28a5de5d262ac2af5b00595e651696d0