Last active
July 29, 2025 09:11
-
-
Save mloskot/a648d11468f56e3b7a4f3ffb5fe7a9c0 to your computer and use it in GitHub Desktop.
Count Git LFS objects and their sizes, total and per file type
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # Counts Git LFS objects and per file type, and calculates total sizes. | |
| import os | |
| import sys | |
| import tempfile | |
| print('Collecting sizes of Git LFS objects') | |
| tmp_file = os.path.join(os.getcwd(), 'tmp_sizes.tx') | |
| os.system('git lfs ls-files --long --size > {0}'.format(tmp_file)) | |
| print('Calculating total size per file type') | |
| total = {} | |
| to_bytes = {"B": 1, "KB": 10**3, "MB": 10**6, "GB": 10**9, "TB": 10**12} | |
| with open(tmp_file, 'r') as f: | |
| for line in f: | |
| sha, line = line.split(' - ') | |
| name, line = line.split(' (') | |
| size, line = line.split(')') | |
| name, ext = os.path.splitext(name) | |
| size, unit = size.split() | |
| if not ext in total: | |
| total[ext] = { 'count': 0, 'size': 0} | |
| total[ext]['count'] += 1 | |
| total[ext]['size'] += int(float(size) * to_bytes[unit]) | |
| if total: | |
| print('Git LFS objects summary:') | |
| for k, v in total.items(): | |
| print('{}:\tcount: {}\tsize: {:.2f} MB'.format(k, v['count'], v['size'] / 1024**2)) | |
| c = sum([ v['count'] for _,v in total.items() ]) | |
| s = sum([ v['size'] for _,v in total.items() ]) / 1024**2 | |
| print('Total:\tcount: {}\tsize: {:.2f} MB'.format(c, s)) | |
| else: | |
| print('no summary to print') | |
| if os.path.exists(tmp_file): | |
| os.unlink(tmp_file) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| # The ls-files report human-friendly sizes. | |
| # The poor-man sed commands convert all sizes to bytes. | |
| git lfs ls-files --long --size \ | |
| | awk '{print $4" "$5" "$3" "$1}' \ | |
| | tr -d '()' \ | |
| | sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? GB/\1000000000\2/' \ | |
| | sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? MB/\1000000\2/' \ | |
| | sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? KB/\1000\2/' \ | |
| | sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? B/\1\2/' \ | |
| | sort --key 1 --numeric-sort --reverse \ | |
| | uniq |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Very inspiring.
I used
sort --key 1 --human-numeric-sort --reverseto avoid conversion (it requires to remove the space between$4and$5which is ok). Andgit lfs ls-files --size $@to be able to pass other parameters such as--allor--long(if needed be)