Skip to content

Instantly share code, notes, and snippets.

@ksindi
Created March 19, 2018 19:39
Show Gist options
  • Save ksindi/3c7251aec6e6655eaa48f945a6a3bf28 to your computer and use it in GitHub Desktop.
Save ksindi/3c7251aec6e6655eaa48f945a6a3bf28 to your computer and use it in GitHub Desktop.
"""Plot histogram of repo age in months."""
import os
import pandas as pd
import numpy as np
# pip install pygithub
from github import Github
# for jupyter notebook
# import seaborn as sns
# sns.set()
# Create a personal access token with repo access
GITHUB_REPO_ACCESS_TOKEN = os.environ['GITHUB_REPO_ACCESS_TOKEN']
gh = Github(GITHUB_REPO_ACCESS_TOKEN)
# Get repo data
tups = []
for repo in gh.get_user().get_repos():
tups.append((repo.owner.login, repo.name, repo.updated_at, repo.fork))
# Create dataframe
df = pd.DataFrame(tups, columns=['org', 'name', 'updated_at', 'fork'])
today = pd.datetime.today()
# Add your repo filters (e.g. df.org == 'my_org')
filters = (df.fork == False)
filtered_df = df[filters]
# Get number of months elapsed
months_elapsed = (today - filtered_df.updated_at) / np.timedelta64(1, 'M')
# Bin the data by month
labels = list(str(x) for x in range(12))
binned = pd.cut(months_elapsed, bins=list(range(13)), labels=labels)
# Fill missing values as 12 months ore more
binned = binned.cat.add_categories(['12+'])
value_counts = binned.fillna('12+').value_counts()
# Normalize values
normalized = 100 * (value_counts / value_counts.sum())
# Reorder
reordered = normalized.loc[(labels + ['12+'])]
# Plot
ax = reordered.plot.bar(title="GitHub repos histogram (total: {})".format(value_counts.sum()))
ax.set_xlabel("Months since last update")
ax.set_ylabel("% of total");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment