Last active
January 3, 2022 09:40
-
-
Save gbin/4998909 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# | |
# This script is a tool that helps you calculate the potential benefits | |
# in occupied size on disk using the ext4 inlinedata new feature on | |
# Linux kernels 3.8.0+: | |
# "The new Inline Data Support feature allows Ext4 to store files that only consist of a few bytes together with the inode to save storage space and accelerate access" | |
# see http://www.h-online.com/open/features/What-s-new-in-Linux-3-8-1804240.html for details. | |
# | |
# Just run it on your ext4 mountpoints and it will tell give you the trade off | |
# for all your files depending on the inode size you choose. | |
# | |
# To get you current inode size you can do : | |
# $ tune2fs -l /dev/sda | grep Inode | |
# Inode count: 15040512 | |
# Inodes per group: 8192 | |
# Inode blocks per group: 512 | |
# Inode size: 256 | |
# | |
# You can set the inode size at creation time with : | |
# mkfs.ext4 -I inode-size /dev/... | |
# | |
import os | |
from sys import argv, stdout | |
def find_mount_point(path): | |
path = os.path.abspath(path) | |
while not os.path.ismount(path): | |
path = os.path.dirname(path) | |
return path | |
def compute_rough_file_count(path): | |
st = os.statvfs(path) | |
return st.f_files - st.f_ffree | |
def progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█'): | |
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) | |
filled_length = int(length * iteration // total) | |
bar = fill * filled_length + '-' * (length - filled_length) | |
stdout.write('\x1b[2K') | |
print(f'{prefix} |{bar}| {percent}% {suffix}', end='\r') | |
if iteration == total: | |
print() | |
def get_all_ext4_mountpoints(): | |
ext4mountedpaths = [] | |
other_mount_points = set() | |
with open('/etc/mtab', 'r') as f: | |
for line in f.readlines(): | |
block_dev, mount_point, fs, opts, _, __ = line.split() | |
if fs == 'ext4': | |
ext4mountedpaths.append(mount_point) | |
else: | |
other_mount_points.add(mount_point) | |
return ext4mountedpaths, other_mount_points | |
TYPICAL_OCCUPIED_SPACE_FOR_INODE = 124 | |
EXPLORE_INODES_SIZES = (256, 512, 1024, 2048, 4096) | |
def main(): | |
if len(argv) != 2: | |
print('Syntax : ext4-inlinedata-calculator.py mountpoint') | |
exit(-1) | |
ext4_mount_points, other_mount_points = get_all_ext4_mountpoints() | |
if not ext4_mount_points: | |
print('No ext4 fs are mounted on your system.') | |
_, mount_point = argv | |
print(f'Exploring {mount_point} ...') | |
to_explore = compute_rough_file_count(mount_point) | |
print(f'Number of inodes to explore {to_explore:,} ...') | |
print() | |
print() | |
allfiles = {} | |
inode_count = 0 | |
for root, subFolders, files in os.walk(mount_point): | |
inode_count += 1 | |
if find_mount_point(root) in other_mount_points: | |
continue | |
for entry in files: | |
try: | |
filename = os.path.join(root, entry) | |
if find_mount_point(filename) not in ext4_mount_points: | |
continue # only consider files from a mounted ext4 filesystems | |
inode_count += 1 | |
allfiles[filename] = os.path.getsize(filename) if os.path.isfile( | |
filename) else 0 # count as an empty file everything entry not beeing a real file | |
if not (inode_count % 10000): | |
progress_bar(inode_count, to_explore, prefix='Progress', suffix='Complete') | |
except OSError as e: | |
print(entry) | |
print(e) | |
continue # probably a systemfile | |
results = {inode_size: [0, 0] for inode_size in EXPLORE_INODES_SIZES} | |
total_occupied_space = 0 | |
for _, filesize in allfiles.items(): | |
total_occupied_space += filesize | |
for size in EXPLORE_INODES_SIZES: | |
if filesize <= size - TYPICAL_OCCUPIED_SPACE_FOR_INODE: | |
results[size][0] += 1 | |
results[size][1] += size - TYPICAL_OCCUPIED_SPACE_FOR_INODE - filesize | |
else: | |
results[size][1] += TYPICAL_OCCUPIED_SPACE_FOR_INODE # the full empty space is wasted | |
if total_occupied_space == 0: | |
print('No relevant file found') | |
exit(-2) | |
print(f""" | |
== Results == | |
Total file size {total_occupied_space:,} | |
Inode size\t\tfiles fit in\t\twasted inode space""") | |
total_nb = len(allfiles) | |
for size in EXPLORE_INODES_SIZES: | |
nb, wasted = results[size] | |
print(f'{size}\t\t\t{nb / float(total_nb):>7.2%}\t\t\t{float(wasted) / float(total_occupied_space):>15.8%}') | |
if __name__ == '__main__': | |
main() |
I am uploading an updated version
Thank you.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It would be great to adjust the script to use the actual size of sparse files.
Also please convert the script to Python3. Thanks.