nh13 · February 26, 2018 20:08
diff --git a/aws_s3_summarize.py b/aws_s3_summarize.py
 #!/bin/python

 import sys
 import argparse
 from pathlib import Path
 from collections import OrderedDict
 import math

 units_and_size = [
        ('Bytes', 1),
        ('KiB',   1024),
        ('MiB',   1024*1024),
        ('GiB',   1024*1024*1024),
        ('TiB',   1024*1024*1024*1024),
        ('PiB',   1024*1024*1024*1024*1024)
        ]

 to_bytes = OrderedDict(units_and_size + [('Byte', 1)])

 def bytes_to_human(b):
    idx = 0
    while 1024 <= b:
        idx += 1
        b = b / 1024.0
    return f'{b:.1f} {units_and_size[idx][0]}'


 def print_bucket_data(bucket, bucket_data, total_size, min_size=1, line_length=60):
    def print_str(s):
        num_dashes = (line_length - len(s) - 2) / 2
        print('-'*math.floor(num_dashes) + f' {s} ' + '-'*math.ceil(num_dashes))

    num_dashes = (line_length - len(bucket) - 2) / 2
    print('-'*line_length)
    print_str(bucket)
    print('-'*line_length)
    print()
    def sort_key(item):
        ''' Longer paths first, and then smaller sizes first '''
        k, v = item
        return (-len(Path(k).parents), v)
    level = None
    for k, v in sorted(bucket_data.items(), key=sort_key):
        if v < min_size:
            continue
        if level is None or level > len(k.parents):
            if level is not None:
                print('')
            level = len(k.parents)
            print_str(str(level))
        print(f'{bytes_to_human(v):<12s} : s3://{bucket}/{k}')
    print(f'{bytes_to_human(total_size):<12s} : s3://{bucket}')
    print()

 def main(args=None):
    if args is None:
        args = sys.argv[1:]

    parser = argparse.ArgumentParser()
    parser.add_argument('-b', '--bucket-file', help='One or more files containing the output of aws s3 ls --summarize --human-readable --recursive.', nargs='+', type=Path, required=True)
    parser.add_argument('-d', '--max-depth', help='The maximum depth to display', default=math.inf, type=int)
    parser.add_argument('-m', '--min-size', help='The minimum size to display', default='1 Bytes', type=str)
    parser.add_argument('-s', '--stop-after', help='Stop after this number of objects', default=math.inf, type=int)
    args = parser.parse_args()

    max_depth = args.max_depth 
    min_size_value, min_size_units = args.min_size.split()
    min_size = float(min_size_value) * to_bytes[min_size_units] 

    for bucket_path in args.bucket_file:
        bucket = bucket_path.with_suffix('').name
        with bucket_path.open('r') as fh:
            bucket_data = {}

            total_size = 0
            for i, line in enumerate(fh):
                line = line.rstrip('\r\n')
                if line == '':
                    break
                try:
                    date, time, value, units, path = line.split(maxsplit=4)
                except ValueError as e:
                    sys.stderr.write(f'Error: on line {i+1}: {line}\n')
                    raise e
                units = to_bytes[units]
                size  = float(value) * units
                paths = [path] + list(Path(path).parents)[:-1]
                total_size += size

                if 0 < max_depth and max_depth != math.inf:
                    start_idx = len(paths)-max_depth
                    if start_idx < 0:
                        start_idx = 0
                    paths = paths[start_idx:]

                for path in paths:
                    path = Path(path)
                    if not path in bucket_data:
                        bucket_data[path] = size 
                    else:
                        bucket_data[path] = bucket_data[path] + size 
                if args.stop_after <= i:
                    break
            print_bucket_data(bucket=bucket, bucket_data=bucket_data, total_size=total_size, min_size=min_size)

 if __name__ == '__main__':
    main()
	#!/bin/python

	import sys
	import argparse
	from pathlib import Path
	from collections import OrderedDict
	import math

	units_and_size = [
	('Bytes', 1),
	('KiB', 1024),
	('MiB', 1024*1024),
	('GiB', 102410241024),
	('TiB', 102410241024*1024),
	('PiB', 10241024102410241024)
	]

	to_bytes = OrderedDict(units_and_size + [('Byte', 1)])

	def bytes_to_human(b):
	idx = 0
	while 1024 <= b:
	idx += 1
	b = b / 1024.0
	return f'{b:.1f} {units_and_size[idx][0]}'


	def print_bucket_data(bucket, bucket_data, total_size, min_size=1, line_length=60):
	def print_str(s):
	num_dashes = (line_length - len(s) - 2) / 2
	print('-'math.floor(num_dashes) + f' {s} ' + '-'math.ceil(num_dashes))

	num_dashes = (line_length - len(bucket) - 2) / 2
	print('-'*line_length)
	print_str(bucket)
	print('-'*line_length)
	print()
	def sort_key(item):
	''' Longer paths first, and then smaller sizes first '''
	k, v = item
	return (-len(Path(k).parents), v)
	level = None
	for k, v in sorted(bucket_data.items(), key=sort_key):
	if v < min_size:
	continue
	if level is None or level > len(k.parents):
	if level is not None:
	print('')
	level = len(k.parents)
	print_str(str(level))
	print(f'{bytes_to_human(v):<12s} : s3://{bucket}/{k}')
	print(f'{bytes_to_human(total_size):<12s} : s3://{bucket}')
	print()

	def main(args=None):
	if args is None:
	args = sys.argv[1:]

	parser = argparse.ArgumentParser()
	parser.add_argument('-b', '--bucket-file', help='One or more files containing the output of aws s3 ls --summarize --human-readable --recursive.', nargs='+', type=Path, required=True)
	parser.add_argument('-d', '--max-depth', help='The maximum depth to display', default=math.inf, type=int)
	parser.add_argument('-m', '--min-size', help='The minimum size to display', default='1 Bytes', type=str)
	parser.add_argument('-s', '--stop-after', help='Stop after this number of objects', default=math.inf, type=int)
	args = parser.parse_args()

	max_depth = args.max_depth
	min_size_value, min_size_units = args.min_size.split()
	min_size = float(min_size_value) * to_bytes[min_size_units]

	for bucket_path in args.bucket_file:
	bucket = bucket_path.with_suffix('').name
	with bucket_path.open('r') as fh:
	bucket_data = {}

	total_size = 0
	for i, line in enumerate(fh):
	line = line.rstrip('\r\n')
	if line == '':
	break
	try:
	date, time, value, units, path = line.split(maxsplit=4)
	except ValueError as e:
	sys.stderr.write(f'Error: on line {i+1}: {line}\n')
	raise e
	units = to_bytes[units]
	size = float(value) * units
	paths = [path] + list(Path(path).parents)[:-1]
	total_size += size

	if 0 < max_depth and max_depth != math.inf:
	start_idx = len(paths)-max_depth
	if start_idx < 0:
	start_idx = 0
	paths = paths[start_idx:]

	for path in paths:
	path = Path(path)
	if not path in bucket_data:
	bucket_data[path] = size
	else:
	bucket_data[path] = bucket_data[path] + size
	if args.stop_after <= i:
	break
	print_bucket_data(bucket=bucket, bucket_data=bucket_data, total_size=total_size, min_size=min_size)

	if __name__ == '__main__':
	main()
No results found