Created
March 18, 2023 13:36
-
-
Save a-berg/570c08f0402899e140ffecbf786f1c25 to your computer and use it in GitHub Desktop.
directory crawler & dirtree formatter, generated by GPT-4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
from pathlib import Path | |
def crawl_directory(directory, level=None, alias=None): | |
def _crawl_directory(dir_path, curr_level): | |
if level is not None and curr_level > level: | |
return | |
nested_dict = {} | |
for item in dir_path.iterdir(): | |
if item.is_file(): | |
nested_dict[item.name] = None | |
elif item.is_dir(): | |
nested_dict[item.name] = _crawl_directory(item, curr_level + 1) | |
return nested_dict | |
if alias is None: | |
alias = directory.name | |
return {alias: _crawl_directory(directory, 1)} | |
def to_dirtree_format(nested_dict): | |
def _to_dirtree_format(nested_dict, depth=1): | |
dirtree_str = "" | |
for key, value in nested_dict.items(): | |
dirtree_str += f".{depth} {key}.\n" | |
if value is None: # It's a file | |
pass | |
else: # It's a directory | |
dirtree_str += _to_dirtree_format(value, depth + 1) | |
return dirtree_str | |
dirtree_str = _to_dirtree_format(nested_dict).replace("_", r"\_") | |
return "\\dirtree{%\n" + dirtree_str + "}" | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Recursively crawl a directory and create a nested dictionary representing the file and folder structure" | |
) | |
parser.add_argument("directory", type=Path, help="The directory to crawl") | |
parser.add_argument( | |
"--level", "-L", type=int, default=None, help="The maximum depth of recursion" | |
) | |
parser.add_argument( | |
"--alias", "-a", type=str, default=None, help="An alias for the input directory" | |
) | |
parser.add_argument( | |
"--to-dirtree", | |
"-D", | |
action="store_true", | |
default=False, | |
help="formats the output to LaTeX dirtree instead of JSON.", | |
) | |
args = parser.parse_args() | |
result = crawl_directory(args.directory, args.level, args.alias) | |
result = result if not args.to_dirtree else to_dirtree_format(result) | |
print(result) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
You have to write a python script that: | |
1. Recursively crawls a directory, creating a nested dictionary representing the file and folder structure, with the first key being said directory. | |
2. The recursion could have a limit, like the standard `tree` bash command. | |
3. Using a recursive funcion is not a must, you could use a `while` loop if it helps performance and readability, or be creative with `dict` merging and splitting paths. | |
3. accepts a directory as an input | |
4. accepts a level optional argument to limit | |
5. accepts an optional alias for the input directory | |
6. uses pathlib for path related functions and argparse for argument parsing | |
Example: for a folder named "myfolder/" with the following structure: | |
notes/ | |
├── bibliography.bibtex | |
├── clean_arch.md | |
├── dvc_cml.md | |
└── includes | |
├── tree_l1.tex | |
└── tree_l2.tex | |
the expected output is: | |
{ | |
"notes": { | |
"bibliography.bibtex": None, | |
"clean_arch.md": None, | |
"dvc_cml.md": None, | |
"includes": { | |
"tree_l1.tex": None, | |
"tree_l2.tex": None, | |
} | |
} | |
} | |
Revise your work for potential bugs before submitting. Write tests in a separate script to check it too. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
from pathlib import Path | |
from directory_crawler import crawl_directory | |
class TestCrawlDirectory(unittest.TestCase): | |
def setUp(self): | |
self.test_dir = Path("mydir") #FIXME: add the complete structure so the test is viable. | |
def test_no_limit(self): | |
expected_output = { | |
"mydir": { | |
"fileA": None, | |
"fileB": None, | |
"fileC": None, | |
"folder1": { | |
"file1": None, | |
"file2": None, | |
} | |
} | |
} | |
self.assertEqual(crawl_directory(self.test_dir), expected_output) | |
def test_limit_1(self): | |
expected_output = { | |
"mydir": { | |
"fileA": None, | |
"fileB": None, | |
"fileC": None, | |
"folder1": None, | |
} | |
} | |
self.assertEqual(crawl_directory(self.test_dir, level=1), expected_output) | |
def test_alias(self): | |
expected_output = { | |
"my_dir": { | |
"fileA": None, | |
"fileB": None, | |
"fileC": None, | |
"folder1": { | |
"file1": None, | |
"file2": None, | |
} | |
} | |
} | |
self.assertEqual(crawl_directory(self.test_dir, alias="my_dir"), expected_output) | |
if __name__ == "__main__": | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment