Last active
September 27, 2024 13:18
-
-
Save spraints/f8694f22ff3d40c73f45b9728714f508 to your computer and use it in GitHub Desktop.
Find which parts of your Git repository are growing the fastest
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#/ Usage: ruby recent-growth.rb [OPTIONS] | |
#/ --git-dir <git_dir> | |
#/ --since <date> | |
#/ --until <date> | |
#/ --max-count <count> | |
#/ --per tree|extension | |
#/ --list-all (in the summary, show all trees / extensions, not just the top 10) | |
#/ --all | --branches | --tags | --remotes | <commit> | |
#/ | |
#/ Example: | |
#/ $ git clone https://gist.github.com/f8694f22ff3d40c73f45b9728714f508 repo-growth | |
#/ $ git clone --mirror https://github.com/me/my-repo | |
#/ $ cd my-repo | |
#/ $ ruby ../repo-growth/recent-growth.rb --since 1.week.ago --branches --tags | |
DEBUG = ENV["DEBUG"].to_s != "" | |
def main(mode:, git_dir:, filters:, commits:, list_all: false) | |
git_dir_args = git_dir ? ["-C", git_dir] : [] | |
boundary_cmd = [ | |
"git", *git_dir_args, "rev-list", | |
"--boundary", | |
*filters, | |
*commits, | |
] | |
rev_list_cmd = [ | |
"git", *git_dir_args, "rev-list", | |
"--stdin", | |
"--objects", "--object-names", | |
] | |
cat_file_cmd = [ | |
"git", *git_dir_args, "cat-file", | |
"--batch-check=%(objectname) %(objecttype) %(objectsize:disk)", | |
] | |
if DEBUG | |
puts "$ #{boundary_cmd.join(" ")} |" | |
puts " #{rev_list_cmd.join(" ")} |" | |
puts " #{cat_file_cmd.join(" ")}" | |
end | |
IO.popen(boundary_cmd) do |boundary| | |
r, rev_list_in = IO.pipe | |
IO.popen(rev_list_cmd, in: r) do |rev_list| | |
r, cat_file_in = IO.pipe | |
IO.popen(cat_file_cmd, in: r) do |cat_file| | |
names = {} | |
rs = [ | |
boundary, | |
rev_list, | |
cat_file, | |
] | |
until rs.empty? | |
ready, = IO.select(rs) | |
ready.each do |f| | |
case f | |
when boundary | |
begin | |
# rev-list --boundary outputs all the commits it finds as | |
# "<oid>" and the boundaries as "-<oid>". We want to feed this | |
# back into the next rev-list as "<oid>" and "~<oid>" so that | |
# we can see which objects come from just the new commits. | |
oid = boundary.readline | |
rev_list_in.print(oid.tr("-", "^")) | |
rescue EOFError | |
rs.delete(boundary) | |
rev_list_in.close | |
end | |
when rev_list | |
begin | |
# capture the object name, if any, so we can use it later, and | |
# send the oid along to cat-file. | |
oid, name = rev_list.readline.strip.split(" ", 2) | |
names[oid] = name if name | |
cat_file_in.puts oid | |
rescue EOFError | |
rs.delete(rev_list) | |
cat_file_in.close | |
end | |
when cat_file | |
begin | |
# process the output. | |
oid, type, size = cat_file.readline.strip.split(" ", 3) | |
mode.add \ | |
name: names.delete(oid), | |
oid: oid, | |
type: type, | |
size: size.to_i | |
rescue EOFError | |
rs.delete(cat_file) | |
end | |
end | |
end | |
end | |
end | |
end | |
end | |
# show the stats | |
mode.finish \ | |
list_all: list_all | |
end | |
# git rev-list --objects --no-object-names --since-as-filter=1.week.ago | git cat-file --batch-check='%(objectsize:disk)' | perl -lne '$sum += $_ for /-?\d+/g; END { print $sum }' | |
# next step is to write a script that figures out growth per dir, file extension, object type, etc. | |
class Total | |
def initialize | |
@total_size = 0 | |
end | |
def add(size:, **) | |
@total_size += size | |
end | |
def finish(list_all:) | |
puts "Total growth:" | |
printf " %15d bytes\n", @total_size | |
end | |
end | |
class CountPerType | |
def initialize | |
@counts = Hash.new(0) | |
end | |
def add(type:, **) | |
@counts[type] += 1 | |
end | |
def finish(list_all:) | |
puts "New objects:" | |
@counts.sort_by { |type, _| type }.each do |type, count| | |
printf " %15d %s\n", count, type | |
end | |
end | |
end | |
class PerTree | |
def initialize | |
@toplevel = 0 | |
@subdirs = Hash.new(0) | |
end | |
def add(size:, name:, **) | |
if name.nil? | |
@toplevel += size | |
else | |
top_level_tree, rest = name.split("/", 2) | |
@subdirs[top_level_tree] += size | |
end | |
end | |
def finish(list_all:) | |
@subdirs[:"(other)"] = @toplevel | |
puts "Growth per tree:" | |
sorted = @subdirs.sort_by { |_, size| -size } | |
sorted = sorted.take(10) unless list_all | |
sorted.each do |subdir, size| | |
printf " %15d bytes - %s\n", size, subdir | |
end | |
end | |
end | |
class PerExtension | |
def initialize | |
@extensions = Hash.new(0) | |
end | |
def add(size:, name:, type:, **) | |
return if name.nil? || type != "blob" | |
ext = File.extname(name) | |
ext = :"(none)" if ext == "" | |
@extensions[ext] += size | |
end | |
def finish(list_all:) | |
puts "Growth per extension:" | |
sorted = @extensions.sort_by { |_, size| -size } | |
sorted = sorted.take(10) unless list_all | |
sorted.each do |ext, size| | |
printf " %15d bytes - %s\n", size, ext | |
end | |
end | |
end | |
class PerType | |
def initialize | |
@types = Hash.new(0) | |
end | |
def add(size:, type:, **) | |
@types[type] += size | |
end | |
def finish(list_all:) | |
puts "Growth per object type:" | |
sorted = @types.sort_by { |type, _| type } | |
sorted = sorted.take(10) unless list_all | |
sorted.each do |type, size| | |
printf " %15d bytes - %s\n", size, type | |
end | |
end | |
end | |
class Multi | |
def initialize(*collectors) | |
@collectors = collectors | |
end | |
def add(**opts) | |
@collectors.each do |c| | |
c.add(**opts) | |
end | |
end | |
def finish(list_all:) | |
@collectors.each do |c| | |
c.finish(list_all: list_all) | |
end | |
end | |
end | |
def usage_and_exit | |
system "cat #$0 | grep ^#/ | cut -c4-" | |
exit 1 | |
end | |
mode = Multi.new \ | |
Total.new, | |
CountPerType.new, | |
PerType.new, | |
PerExtension.new | |
git_dir = nil | |
filters = [] | |
commits = [] | |
list_all = false | |
until ARGV.empty? | |
case arg = ARGV.shift | |
when /^--since/, /^--max-count/, /^--until/ | |
filters << arg | |
filters << ARGV.shift unless arg =~ /=/ | |
when "-n" | |
filters << arg | |
filters << ARGV.shift | |
when "--all", "--branches", "--tags", "--remotes" | |
commits << arg | |
when "--git-dir" | |
git_dir = ARGV.shift | |
when "--per" | |
case arg = ARGV.shift | |
when "tree" | |
mode = PerTree.new | |
when /^ext/ | |
mode = PerExtension.new | |
else | |
usage_and_exit | |
end | |
when "--list-all" | |
list_all = true | |
when /^-/ | |
usage_and_exit | |
else | |
commits << arg | |
end | |
end | |
commits << "--all" if commits.empty? | |
main \ | |
mode: mode, | |
git_dir: git_dir, | |
filters: filters, | |
commits: commits, | |
list_all: list_all |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment