Created
March 26, 2024 11:08
-
-
Save diramazioni/1e960f0433999efe39769b205e364c50 to your computer and use it in GitHub Desktop.
List files recursively from a json configuration passed as argument
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import fnmatch | |
import json | |
import argparse | |
def list_files_recursive(directory, include_patterns, exclude_dirs): | |
""" | |
List files recursively in the given directory, | |
filtering them by include patterns and excluding specified directories. | |
:param directory: The base directory to search in. | |
:param include_patterns: List of patterns of files to include. | |
:param exclude_dirs: List of directories names to exclude from the search. | |
:param batch: [optional] If >0 slice the data in batches of batch size. | |
:param start: [optional] If set start from this index. | |
:return: A list of file paths that match the include patterns and | |
are not within the excluded directories. | |
""" | |
matches = [] | |
for root, dirnames, filenames in os.walk(directory): | |
# Exclude directories | |
dirnames[:] = [d for d in dirnames if d not in exclude_dirs] | |
# Filter files by include patterns | |
for pattern in include_patterns: | |
for filename in fnmatch.filter(filenames, pattern): | |
matches.append(os.path.join(root, filename)) | |
return matches | |
def main(): | |
# Set up argument parser | |
parser = argparse.ArgumentParser(description='List files recursively based on include and exclude patterns.') | |
parser.add_argument('input_json', help=''' | |
{ | |
"directory": "/path/to/search", | |
"includeFile": ["*.ts"], | |
"excludeDir": ["node_modules", "dist"], | |
"batch":10, | |
"start":0 | |
} | |
''') | |
# Parse arguments | |
args = parser.parse_args() | |
# Convert the JSON string into a Python dictionary | |
try: | |
input_params = json.loads(args.input_json) | |
except json.JSONDecodeError: | |
raise ValueError("Invalid JSON provided as input") | |
# Extract parameters | |
search_directory = input_params.get('directory') | |
include_files = input_params.get('includeFile', []) | |
exclude_directories = input_params.get('excludeDir', []) | |
batch = input_params.get('batch', 0) | |
start = input_params.get('start', 0) | |
# Validate directory | |
if not os.path.isdir(search_directory): | |
raise ValueError(f"The specified directory does not exist: {search_directory}") | |
# Get the list of files | |
files = list_files_recursive(search_directory, include_files, exclude_directories) | |
# Slice if the batch is set | |
if batch > 0: | |
files = files[start:start+batch] | |
# Output the list of files | |
print(json.dumps(files)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment