Skip to content

Instantly share code, notes, and snippets.

@jmquintana79
Last active July 9, 2025 07:43
Show Gist options
  • Save jmquintana79/578d2f9ab3cb16fc0c35e0f7db2cbf81 to your computer and use it in GitHub Desktop.
Save jmquintana79/578d2f9ab3cb16fc0c35e0f7db2cbf81 to your computer and use it in GitHub Desktop.
import os
import pandas as pd
import logging
def list_files_recursive(folder_input:str, file_format:str = '') -> pd.DataFrame:
"""Collect a table of files in a given input folder with a selected format
Arguments:
folder_input {str} -- Input folder to be analized.
Keyword Arguments:
file_format {str} -- File format to be collected (default: {''})
Raises:
ValueError: Stop
Returns:
pd.DataFrame -- Output table with folders, files and paths.
"""
# validate arguments
assert isinstance(folder_input, str)
assert os.path.isdir(folder_input), f"Input folder does not exist: '{folder_input}'"
assert isinstance(file_format, str)
# recursive files list
try:
# initialization
l_files = []
l_folders = []
# loop of walking
for folder_root, _, files_in_folder in os.walk(folder_input):
# loop of files
for file in files_in_folder:
# build path
path = os.path.join(folder_root, file)
# validate format
if file_format == '':
pass
else:
if file.lower().endswith(file_format.lower()):
pass
else:
continue
# append path and folders
l_files.append(path)
l_folders.append(folder_root.replace(folder_input, ""))
# validate the collected information
if (len(l_files) != len(l_folders)) or (len(l_files) == 0):
# display
logging.warning(f"It was not collected any content: num of files = {len(l_files)} / num of folders = {len(l_folders)}")
# return empty df
return pd.DataFrame({"folder":[], "filename":[], "path":[]})
else:
# build df
df = pd.DataFrame({"folder":l_folders, "path":l_files})
df["filename"] = df["path"].apply(lambda x: os.path.basename(x))
# return
return df[["folder", "filename", "path"]]
except Exception:
logging.exception("It was not possible to collect files in the selected folder.")
raise ValueError("stop")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment