Skip to content

Instantly share code, notes, and snippets.

@bogdanp05
Last active November 16, 2018 10:45
Show Gist options
  • Save bogdanp05/e07f894d71aeb724d377259d5b039653 to your computer and use it in GitHub Desktop.
Save bogdanp05/e07f894d71aeb724d377259d5b039653 to your computer and use it in GitHub Desktop.
Get a list of number of files and size of files for all file types in a directory. Example: "python file_types.py ./dir_path"
import os
from os.path import join, getsize
import argparse
import operator
def parse_args():
parser = argparse.ArgumentParser(description='Count the number of files for each file type in a directory.')
parser.add_argument('path', metavar='p', type=str, help='path of the directory')
parser.add_argument('-threshold', metavar='t', type=int, default=10,
help='file count threshold (default: 10)')
args = parser.parse_args()
return args
def count_files(path):
extension_dict = {}
all_files = 0
for root, dirs, files in os.walk(path):
for file in files:
all_files = all_files + 1
chunks = file.split(".")
if len(chunks) > 1:
size = getsize(join(root, file))
if chunks[1] in extension_dict:
(num, total_sizes) = extension_dict[chunks[1]]
num = num + 1
total_sizes = total_sizes + size
extension_dict[chunks[1]] = (num, total_sizes)
else:
extension_dict[chunks[1]] = (1, size)
print("total files: %d" % all_files)
return extension_dict
def trim_dict(extension_dict, threshold):
trimmed_dict = {}
for k, v in extension_dict.items():
if v[0] > threshold:
trimmed_dict[k] = v
return trimmed_dict
def print_dict(unsorted_dict):
sorted_list = sorted(unsorted_dict.items(), key=operator.itemgetter(1), reverse=True)
print("file type - number of files - size of files(bytes)\n")
for i in sorted_list:
# tuples like ('c', (991, 21108147))
print("%s - %d - %d" % (i[0], i[1][0], i[1][1]))
def main():
path = parse_args().path
threshold = parse_args().threshold
output_dict = count_files(path)
output_dict = trim_dict(output_dict, threshold)
print_dict(output_dict)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment