Created
December 3, 2018 23:30
-
-
Save panzerama/b46a977ae1acf31a5a941bab7db00a70 to your computer and use it in GitHub Desktop.
A Python script for visualizing the size of objects in your git repo.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Counts file size of objects in your repository's packfile and sorts by | |
buckets. Defaults to buckets of 1M. | |
Requires third-party libraries matplotlib and numpy | |
Author: Jason Drew Panzer | |
""" | |
from matplotlib import pyplot as plt | |
from subprocess import run | |
import numpy as np | |
def packfile_histogram(): | |
# git verify-pack for all pack index files, filtered by blobs and sorted by size in reverse order | |
objects = run('git verify-pack -v .git/objects/pack/pack-*.idx | grep blob | grep -v chain | sort -k3nr', shell=True, capture_output=True, encoding='utf-8') | |
objects = objects.stdout.split('\n')[:-1] | |
# reshape blob sizes for numpy.histogram | |
object_sizes = [(int(x.split()[2])/(1024**2)) for x in objects] | |
object_size_hist, object_size_bins = np.histogram(object_sizes, bins=50) | |
#show off that histogram | |
plt.bar(object_size_bins[:-1], object_size_hist, width=1) | |
plt.xlabel("size in mb") | |
plt.xlim(1, max(object_size_bins)) | |
plt.ylabel("number of objects") | |
plt.ylim(0, max(object_size_hist[1:])) | |
plt.show() | |
if __name__ == '__main__': | |
packfile_histogram() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment