Last active
July 9, 2025 07:43
-
-
Save jmquintana79/578d2f9ab3cb16fc0c35e0f7db2cbf81 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import pandas as pd | |
import logging | |
def list_files_recursive(folder_input:str, file_format:str = '') -> pd.DataFrame: | |
"""Collect a table of files in a given input folder with a selected format | |
Arguments: | |
folder_input {str} -- Input folder to be analized. | |
Keyword Arguments: | |
file_format {str} -- File format to be collected (default: {''}) | |
Raises: | |
ValueError: Stop | |
Returns: | |
pd.DataFrame -- Output table with folders, files and paths. | |
""" | |
# validate arguments | |
assert isinstance(folder_input, str) | |
assert os.path.isdir(folder_input), f"Input folder does not exist: '{folder_input}'" | |
assert isinstance(file_format, str) | |
# recursive files list | |
try: | |
# initialization | |
l_files = [] | |
l_folders = [] | |
# loop of walking | |
for folder_root, _, files_in_folder in os.walk(folder_input): | |
# loop of files | |
for file in files_in_folder: | |
# build path | |
path = os.path.join(folder_root, file) | |
# validate format | |
if file_format == '': | |
pass | |
else: | |
if file.lower().endswith(file_format.lower()): | |
pass | |
else: | |
continue | |
# append path and folders | |
l_files.append(path) | |
l_folders.append(folder_root.replace(folder_input, "")) | |
# validate the collected information | |
if (len(l_files) != len(l_folders)) or (len(l_files) == 0): | |
# display | |
logging.warning(f"It was not collected any content: num of files = {len(l_files)} / num of folders = {len(l_folders)}") | |
# return empty df | |
return pd.DataFrame({"folder":[], "filename":[], "path":[]}) | |
else: | |
# build df | |
df = pd.DataFrame({"folder":l_folders, "path":l_files}) | |
df["filename"] = df["path"].apply(lambda x: os.path.basename(x)) | |
# return | |
return df[["folder", "filename", "path"]] | |
except Exception: | |
logging.exception("It was not possible to collect files in the selected folder.") | |
raise ValueError("stop") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment