Created
February 26, 2019 03:32
-
-
Save ryanwilsonperkin/3fbe622faf9b45ce6ea5356c2dd4e35c to your computer and use it in GitHub Desktop.
Count all the lines I've ever changed on GitHub
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
""" | |
Count all the lines I've ever changed on GitHub | |
Setup: | |
pip install requests tqdm GitPython | |
Usage: | |
./commits.py | |
""" | |
import fnmatch | |
import functools | |
import git | |
import json | |
import os | |
import requests | |
import subprocess | |
import tqdm | |
GITHUB_BASE_URL = 'https://api.github.com' | |
AUTHOR = 'ryanwilsonperkin' | |
IGNORED_REPOS = ('thalmic-alpha/*', ) | |
@functools.lru_cache() | |
def get_token(): | |
"""Load the GitHub token""" | |
return os.environ.get('GITHUB_TOKEN') | |
def mkdir(d): | |
"""Make directory if it doesn't exist""" | |
if not os.path.exists(d): | |
os.mkdir(d) | |
def github_fetch(api, **kwargs): | |
"""Make a GitHub API call""" | |
return requests.get( | |
f'{GITHUB_BASE_URL}{api}', | |
params={'access_token': get_token(), **kwargs}, | |
) | |
def github_get(api, **kwargs): | |
"""Fetch a single value from GitHub""" | |
return github_fetch(api, **kwargs).json() | |
def github_list(api, **kwargs): | |
"""Fetch a list of values from GitHub, handling pagination""" | |
page, has_next = 1, True | |
data = [] | |
while has_next: | |
response = github_fetch(api, page=page, **kwargs) | |
data.extend(response.json()) | |
if 'next' in response.links: | |
page += 1 | |
else: | |
has_next = False | |
return data | |
def get_my_repos(): | |
"""Fetch all repos I have access to from GitHub or from local cache""" | |
if os.path.exists('/tmp/repos.json'): | |
with open('/tmp/repos.json', 'r') as f: | |
return json.load(f) | |
else: | |
repos = github_list('/user/repos') | |
with open('/tmp/repos.json', 'w') as f: | |
json.dump(repos, f) | |
return repos | |
def get_my_commits(repo): | |
"""Fetch all my commits from GitHub or from local cache""" | |
repo_cache = '/tmp/commits/{}.json'.format(repo.replace('/', '__')) | |
mkdir('/tmp/commits') | |
if os.path.exists(repo_cache): | |
with open(repo_cache, 'r') as f: | |
return json.load(f) | |
else: | |
commits = github_list(f'/repos/{repo}/commits', author=AUTHOR) | |
with open(repo_cache, 'w') as f: | |
json.dump(commits, f) | |
return commits | |
def get_commit_details(repo, commits): | |
"""Load commit details from locally cloned project""" | |
repo_cache = '/tmp/commit_details/{}.json'.format(repo.replace('/', '__')) | |
mkdir('/tmp/commit_details') | |
if os.path.exists(repo_cache): | |
with open(repo_cache, 'r') as f: | |
return json.load(f) | |
else: | |
repo = git.Repo(f'/tmp/repos/{repo}') | |
commit_details = { | |
commit['sha']: repo.commit(commit['sha']).stats.total | |
for commit in commits | |
} | |
with open(repo_cache, 'w') as f: | |
json.dump(commit_details, f) | |
return commit_details | |
def clone_repo(repo): | |
"""Clone a repo from GitHub""" | |
owner = repo.split('/')[0] | |
mkdir('/tmp/repos') | |
mkdir(f'/tmp/repos/{owner}') | |
if not os.path.exists(f'/tmp/repos/{repo}'): | |
subprocess.check_output( | |
['git', 'clone', f'[email protected]:{repo}.git', f'/tmp/repos/{repo}'], | |
stderr=subprocess.PIPE, | |
) | |
def filter_repos(repos): | |
"""Filter out any repos in the IGNORED_REPOS list""" | |
for repo in repos: | |
if not any(fnmatch.fnmatch(repo['full_name'], pattern) for pattern in IGNORED_REPOS): | |
yield repo | |
if __name__ == "__main__": | |
commits = {} | |
# Load a list of repos to check | |
repos = list(filter_repos(get_my_repos())) | |
# Setup a nice progress bar for monitoring status | |
t = tqdm.tqdm(repos) | |
for repo in t: | |
repo_name = repo['full_name'] | |
t.set_description(f'{repo_name}: Fetching commits') | |
# Fetch the commits I've made to this repo | |
repo_commits = get_my_commits(repo_name) | |
# Skip if I haven't commited to this repo | |
if not repo_commits: | |
continue | |
# Clone the repo for commit analysis | |
t.set_description(f'{repo_name}: Cloning repo') | |
clone_repo(repo_name) | |
# Load stats about all the commits I made to this repo | |
t.set_description(f'{repo_name}: Loading stats') | |
commit_details = get_commit_details(repo_name, repo_commits) | |
# Keep track in one giant mapping | |
commits.update(commit_details) | |
# Summarize the total number of lines I've ever modified | |
lines_modified = sum(commit['lines'] for commit in commits.values()) | |
print(f'I have changed {lines_modified:,} lines in my GitHub lifetime.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment