Last active
February 7, 2022 20:57
-
-
Save neelabhg/f4ba33c618ae695015b1df0902523d7e to your computer and use it in GitHub Desktop.
Python script to scan a directory tree and output information for files and directories to a JSON file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Scan a directory tree and output information for files and directories to a JSON file. | |
To use: | |
./scan_dir_tree.py --help | |
Using pyenv: | |
PYENV_VERSION=3.10.0 ./scan_dir_tree.py --help | |
""" | |
import sys | |
MIN_PYTHON = (3, 10) | |
if sys.version_info < MIN_PYTHON: | |
sys.exit("Python %s.%s or later is required." % MIN_PYTHON) | |
import os | |
import json | |
import logging | |
from argparse import ArgumentParser | |
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) | |
logger = logging.getLogger(__name__) | |
class FsItem: | |
def __init__(self, parent_path: str, name: str, kind: str) -> None: | |
self.kind = kind | |
self.name = name | |
self.path = os.path.join(parent_path, name) | |
self.name_length = len(self.name) | |
self.path_length = len(self.path) | |
self.num_direct_children = 0 | |
def to_dict(self): | |
return self.__dict__ | |
class FileItem(FsItem): | |
def __init__(self, parent_path: str, name: str) -> None: | |
super().__init__(parent_path, name, "file") | |
class DirectoryItem(FsItem): | |
def __init__(self, parent_path: str, name: str) -> None: | |
super().__init__(parent_path, name, "directory") | |
def get_fs_items(root_directory_path: str) -> tuple[list[DirectoryItem], list[FileItem]]: | |
assert os.path.isdir(root_directory_path) | |
directory_items_by_path: dict[str, DirectoryItem] = {} | |
file_items: list[FileItem] = [] | |
directory_items_by_path[root_directory_path] = DirectoryItem( | |
os.path.dirname(root_directory_path), | |
os.path.basename(root_directory_path) | |
) | |
for (current_path, directory_names, file_names) in os.walk(root_directory_path): | |
assert current_path in directory_items_by_path | |
directory_items_by_path[current_path].num_direct_children = len(file_names) + len(directory_names) | |
for file_name in file_names: | |
file_items.append(FileItem(current_path, file_name)) | |
for directory_name in directory_names: | |
dir_item = DirectoryItem(current_path, directory_name) | |
assert dir_item.path not in directory_items_by_path | |
directory_items_by_path[dir_item.path] = dir_item | |
directory_items = list(directory_items_by_path.values()) | |
return (directory_items, file_items) | |
def save_fs_items(root_directory_path: str, output_file_path: str) -> None: | |
directory_items, file_items = get_fs_items(root_directory_path) | |
all_items: list[FsItem] = directory_items + file_items | |
sorted_items = [i.to_dict() for i in sorted(all_items, key=lambda fs_item: fs_item.path_length)] | |
obj = { | |
"num_items": len(sorted_items), | |
"items": sorted_items, | |
} | |
with open(output_file_path, "w", encoding="utf-8") as f: | |
json.dump(obj, f, indent=4) | |
def sort_results(input_file_path: str, output_file_path: str, sort_key: str) -> None: | |
with open(input_file_path, "r", encoding="utf-8") as f: | |
obj = json.load(f) | |
items = obj["items"] | |
items = sorted(items, key=lambda i: i[sort_key], reverse=True) | |
with open(output_file_path, "w", encoding="utf-8") as f: | |
json.dump(items, f, indent=4) | |
def main() -> None: | |
argument_parser = ArgumentParser() | |
subparsers = argument_parser.add_subparsers(dest="subcommand", required=True, help="Command to run") | |
parser_run = subparsers.add_parser("run") | |
parser_run.add_argument("root_directory_path", help="Path to scan") | |
parser_sort_results = subparsers.add_parser("sort_results") | |
parser_sort_results.add_argument("sort_key", help="An attribute of FsItem for reverse sorting the results") | |
args = argument_parser.parse_args() | |
if args.subcommand == "run": | |
save_fs_items(args.root_directory_path, "./results.json") | |
elif args.subcommand == "sort_results": | |
sort_results("./results.json", "./sorted_results.json", args.sort_key) | |
if __name__ == "__main__": | |
main() | |
else: | |
logger.error("Run as standalone script only") | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment