Last active
July 29, 2025 09:11
-
-
Save mloskot/a648d11468f56e3b7a4f3ffb5fe7a9c0 to your computer and use it in GitHub Desktop.
Count Git LFS objects and their sizes, total and per file type
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # Counts Git LFS objects and per file type, and calculates total sizes. | |
| import os | |
| import sys | |
| import tempfile | |
| print('Collecting sizes of Git LFS objects') | |
| tmp_file = os.path.join(os.getcwd(), 'tmp_sizes.tx') | |
| os.system('git lfs ls-files --long --size > {0}'.format(tmp_file)) | |
| print('Calculating total size per file type') | |
| total = {} | |
| to_bytes = {"B": 1, "KB": 10**3, "MB": 10**6, "GB": 10**9, "TB": 10**12} | |
| with open(tmp_file, 'r') as f: | |
| for line in f: | |
| sha, line = line.split(' - ') | |
| name, line = line.split(' (') | |
| size, line = line.split(')') | |
| name, ext = os.path.splitext(name) | |
| size, unit = size.split() | |
| if not ext in total: | |
| total[ext] = { 'count': 0, 'size': 0} | |
| total[ext]['count'] += 1 | |
| total[ext]['size'] += int(float(size) * to_bytes[unit]) | |
| if total: | |
| print('Git LFS objects summary:') | |
| for k, v in total.items(): | |
| print('{}:\tcount: {}\tsize: {:.2f} MB'.format(k, v['count'], v['size'] / 1024**2)) | |
| c = sum([ v['count'] for _,v in total.items() ]) | |
| s = sum([ v['size'] for _,v in total.items() ]) / 1024**2 | |
| print('Total:\tcount: {}\tsize: {:.2f} MB'.format(c, s)) | |
| else: | |
| print('no summary to print') | |
| if os.path.exists(tmp_file): | |
| os.unlink(tmp_file) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/sh | |
| # The ls-files report human-friendly sizes. | |
| # The poor-man sed commands convert all sizes to bytes. | |
| git lfs ls-files --long --size \ | |
| | awk '{print $4" "$5" "$3" "$1}' \ | |
| | tr -d '()' \ | |
| | sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? GB/\1000000000\2/' \ | |
| | sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? MB/\1000000\2/' \ | |
| | sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? KB/\1000\2/' \ | |
| | sed 's/^\([0-9]\+\)\(\.[0-9]\+\)\? B/\1\2/' \ | |
| | sort --key 1 --numeric-sort --reverse \ | |
| | uniq |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@massich Nice one, I wasn't aware of
--human-numeric-sort. Thanks.