Created
May 26, 2025 15:23
-
-
Save eliezedeck/44a7a3b0d8bb8102cbbddd2f0bdc24dd to your computer and use it in GitHub Desktop.
file_concatenator.py — useful to combine multiple files for AI context as agents sometimes isn't reading the files that you explicitly want
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
File Concatenator Script | |
This script takes a list of file paths and concatenates them into a single file | |
with the format: | |
[ File: /path/to/file ] | |
----- | |
file content | |
----- | |
Usage: | |
python file_concatenator.py file1.py file2.txt file3.csv -o output.txt | |
python file_concatenator.py --files-list files.txt -o output.txt | |
""" | |
import argparse | |
import os | |
import sys | |
from typing import List | |
def read_file_content(file_path: str) -> str: | |
""" | |
Read the content of a file and return it as a string. | |
Args: | |
file_path: Path to the file to read | |
Returns: | |
Content of the file as string | |
Raises: | |
FileNotFoundError: If the file doesn't exist | |
PermissionError: If the file can't be read | |
""" | |
try: | |
with open(file_path, 'r', encoding='utf-8') as f: | |
return f.read() | |
except UnicodeDecodeError: | |
# Try reading as binary for non-text files | |
try: | |
with open(file_path, 'rb') as f: | |
content = f.read() | |
return f"<Binary file content - {len(content)} bytes>" | |
except Exception as e: | |
return f"<Error reading file: {str(e)}>" | |
except Exception as e: | |
return f"<Error reading file: {str(e)}>" | |
def get_absolute_path(file_path: str, base_path: str = None) -> str: | |
""" | |
Get the absolute path of a file. | |
Args: | |
file_path: Path to the file | |
base_path: Base path to resolve relative paths from | |
Returns: | |
Absolute path of the file | |
""" | |
if base_path and not os.path.isabs(file_path): | |
file_path = os.path.join(base_path, file_path) | |
return os.path.abspath(file_path) | |
def concatenate_files(file_paths: List[str], output_file: str, base_path: str = None) -> None: | |
""" | |
Concatenate multiple files into a single file with the specified format. | |
Args: | |
file_paths: List of file paths to concatenate | |
output_file: Path to the output file | |
base_path: Base path to resolve relative paths from | |
""" | |
with open(output_file, 'w', encoding='utf-8') as out_f: | |
for i, file_path in enumerate(file_paths): | |
# Skip empty lines | |
if not file_path.strip(): | |
continue | |
abs_path = get_absolute_path(file_path.strip(), base_path) | |
# Check if file exists | |
if not os.path.exists(abs_path): | |
print(f"Warning: File not found: {abs_path}", file=sys.stderr) | |
continue | |
# Write file header | |
out_f.write(f"[ File: {abs_path} ]\n") | |
out_f.write("-----\n") | |
# Write file content | |
content = read_file_content(abs_path) | |
out_f.write(content) | |
# Ensure content ends with newline | |
if content and not content.endswith('\n'): | |
out_f.write('\n') | |
out_f.write("-----\n") | |
# Add separator between files (except for the last file) | |
if i < len(file_paths) - 1: | |
out_f.write("\n") | |
def read_files_from_list(list_file: str) -> List[str]: | |
""" | |
Read file paths from a text file (one path per line). | |
Args: | |
list_file: Path to the file containing list of file paths | |
Returns: | |
List of file paths | |
""" | |
with open(list_file, 'r', encoding='utf-8') as f: | |
return [line.strip() for line in f.readlines() if line.strip()] | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Concatenate multiple files into a single file with formatted headers", | |
formatter_class=argparse.RawDescriptionHelpFormatter, | |
epilog=""" | |
Examples: | |
%(prog)s file1.py file2.txt -o combined.txt | |
%(prog)s --files-list files.txt -o combined.txt | |
%(prog)s file1.py file2.txt --base-path /project/root -o combined.txt | |
""" | |
) | |
# Input options | |
parser.add_argument( | |
'files', | |
nargs='*', | |
help='List of files to concatenate' | |
) | |
parser.add_argument( | |
'--files-list', '-l', | |
help='Text file containing list of file paths (one per line)' | |
) | |
# Output options | |
parser.add_argument( | |
'--output', '-o', | |
required=True, | |
help='Output file path' | |
) | |
# Base path option | |
parser.add_argument( | |
'--base-path', '-b', | |
help='Base path to resolve relative file paths from' | |
) | |
# Verbose option | |
parser.add_argument( | |
'--verbose', '-v', | |
action='store_true', | |
help='Enable verbose output' | |
) | |
args = parser.parse_args() | |
# Get list of files | |
if args.files_list: | |
if args.verbose: | |
print(f"Reading file list from: {args.files_list}") | |
file_paths = read_files_from_list(args.files_list) | |
else: | |
file_paths = args.files | |
if not file_paths: | |
print("Error: No files specified", file=sys.stderr) | |
sys.exit(1) | |
if args.verbose: | |
print(f"Files to concatenate: {len(file_paths)}") | |
for fp in file_paths: | |
print(f" - {fp}") | |
print(f"Output file: {args.output}") | |
if args.base_path: | |
print(f"Base path: {args.base_path}") | |
try: | |
concatenate_files(file_paths, args.output, args.base_path) | |
print( | |
f"Successfully concatenated {len(file_paths)} files to {args.output}") | |
except Exception as e: | |
print(f"Error: {str(e)}", file=sys.stderr) | |
sys.exit(1) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment