Skip to content

Instantly share code, notes, and snippets.

@psa-jforestier
Forked from morido/lazy_users.py
Last active January 7, 2019 09:53
Show Gist options
  • Save psa-jforestier/9dc1735c43fb4af2da9324513676606d to your computer and use it in GitHub Desktop.
Save psa-jforestier/9dc1735c43fb4af2da9324513676606d to your computer and use it in GitHub Desktop.
detect non-contributors in an organization
#!/usr/bin/env python
# you will need http://github3py.readthedocs.org/en/latest/#installation
# I'm not sure if this works on Windows due to the use of strptime()
# Based on https://gist.github.com/morido/9817399
# Improvement : take user, pass and orga from command line + use the new github3 API
from github3 import login
import datetime
import json
import pprint
import logging
import sys # to use sys.stdout
import os
import argparse
if hasattr(__builtins__, 'raw_input'):
input = raw_input
# Turn off console output buffering
buf_arg = 0
if sys.version_info[0] == 3:
os.environ['PYTHONUNBUFFERED'] = '1'
buf_arg = 1
try:
sys.stdout = os.fdopen(sys.stdout.fileno(), 'a+', buf_arg)
sys.stderr = os.fdopen(sys.stderr.fileno(), 'a+', buf_arg)
except:
pass
parser = argparse.ArgumentParser(description='''
List all active and inactive user of a GitHub organization.''')
parser.add_argument('-u',metavar='USER', dest='github_user', action='store', required=False,
help='GitHub user',
default=None)
parser.add_argument('-p',metavar='PASSWORD', dest='github_pass', action='store', required=False,
help='GitHub password',
default=None)
parser.add_argument('-o',metavar='ORG', dest='github_org', action='store', required=False,
help='GitHub organization to list. Default %(default)s',
default='D4UDigitalPlatform')
parser.add_argument('-w',metavar='WEEKS', dest='weeks', action='store', required=False,
help='Time of interests, in weeks. Default %(default)s',
default='52')
parser.add_argument('--url',metavar='URL', dest='github_url', action='store', required=False,
help='GitHub API url. Default %(default)s',
default='https://api.github.com')
args = parser.parse_args()
if (args.github_user == None):
args.github_user = input("GitHub Username :")
if (args.github_pass == None):
print "GitHub Password (sorry, it will be displayed in the console. Use -p instead"
args.github_pass = input(":")
time_of_interest_in_weeks = int(args.weeks)
# Attention: There is an hourly limit of 5000 requests to the GitHub-API. This script will use a lot of these...
token = login(args.github_user, args.github_pass)
organization = token.organization(args.github_org)
time_of_interest_in_weeks_absolute = datetime.datetime.today() - datetime.timedelta(weeks = time_of_interest_in_weeks)
openETCSusers = []
inactiveopenETCSusers = []
def removeFromInactiveList(loginname):
if loginname in inactiveopenETCSusers:
inactiveopenETCSusers.remove(loginname)
# Setup: Get all users in the organization
for current_repo in organization.repositories():
print current_repo.name + " : "
try:
for current_contributor in current_repo.contributors():
print " - " + current_contributor.login
openETCSusers.append(current_contributor.login)
except Exception as e:
print e
openETCSusers = list(set(openETCSusers)) # remove duplicates
inactiveopenETCSusers = list(openETCSusers) # initialize the inactive users
print "Found " + str(len(openETCSusers)) + " unique users"
# Main part: Remove all users who 'did something'
for current_repo in organization.repositories():
print "Processing Repository",
print current_repo.name
# Step1: remove all users who made on a commit within time_of_interest_in_weeks
try:
#print "1/3 : Get commits"
commitShasToInvestigate = []
for current_commit in current_repo.commits(since = time_of_interest_in_weeks_absolute):
current_commit = current_commit.as_dict()
if (current_commit.get('author') <> None):
removeFromInactiveList(current_commit.get('author').get('login'))
# usually author and committer are equal, but there are very few exceptions
if (current_commit.get('commiter') <> None):
removeFromInactiveList(current_commit.get('commiter').get('login'))
commitShasToInvestigate.append(current_commit.get('sha'))
# Note: This will probably fail to recognize contributions for commits opened in the distant past (before 'since') and updated very recently
except Exception as e:
print e
#print " - removing users from " + str(len(commitShasToInvestigate)) + " commits"
# Step2: remove all users who commented on a commit within time_of_interest_in_weeks
''' remove this part, seems to be very slow and not working as expected
for commit_to_investigate in commitShasToInvestigate:
print commit_to_investigate
for current_commit_comments in current_repo.commits(sha = commit_to_investigate):
current_commit_comments = current_commit_comments.as_dict()
if (current_commit_comments.get('author') <> None):
removeFromInactiveList(str(current_commit_comments.get('author').get('login')))
'''
#print "3/3 : Need to inspect issues"
# Step3: remove all users who commented on something within time_of_interest_in_weeks
nbissue = 0
try:
for current_issue in current_repo.issues():
nbissue = nbissue + 1
for current_comment in current_issue.comments():
json_data = current_comment.to_json()
json_parsed = json.loads(json.dumps(json_data))
JSON_TIMEFORMAT = '%Y-%m-%dT%H:%M:%SZ'
updated_at = datetime.datetime.strptime(json_parsed["updated_at"], JSON_TIMEFORMAT)
if updated_at > time_of_interest_in_weeks_absolute:
removeFromInactiveList(json_parsed["user"]["login"])
print " - removing users from " + nbissue + " issues"
except Exception as e:
# no idea why this happens occassionally, some access rights issue?
print e
print "Inactive user remaining : " + str(len(inactiveopenETCSusers)) + " on " + str(len(openETCSusers))
###
### OUTPUT
###
print '\n'
print 'Considered all contributions after:',
print str(time_of_interest_in_weeks_absolute)
print '\n'
print '---------------'
print 'Inactive users:'
print '---------------\n'
for current_user in inactiveopenETCSusers:
print current_user
print '\n'
print 'Summary:'
print '--------\n'
print '# All users: ',
print len(openETCSusers)
print '# Inactive users: ',
print len(inactiveopenETCSusers),
print ' since ' + str(time_of_interest_in_weeks) + ' weeks'
@psa-jforestier
Copy link
Author

A new approach, written from scratch, with direct GitHub API (no need for an extra lib) is here : https://gist.github.com/psa-jforestier/28a5de5d262ac2af5b00595e651696d0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment