Created
April 23, 2025 05:12
-
-
Save swayson/cad3aa565d63e0977b23d993ec673347 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
# --- Define variables first for clarity (optional, but recommended) --- | |
INPUT_DIR="path/to/your/project" | |
OUTPUT_FILE="collated_output.md" | |
PYTHON_SCRIPT="path/to/process_files.py" # Use absolute or relative path | |
# --- The actual command --- | |
(cd "$INPUT_DIR" && rg --files -g '*.py' -g '*.md' -g '*.toml' -g '.gitignore' .) | \ | |
sort | \ | |
python3 "$PYTHON_SCRIPT" "$INPUT_DIR" > "$OUTPUT_FILE" | |
# --- Optional: Print progress messages from Python to the terminal --- | |
# If you want to see the "Processing: ..." messages from the Python script | |
# while still redirecting the main output: | |
( | |
(cd "$INPUT_DIR" && rg --files -g '*.py' -g '*.md' -g '*.toml' -g '.gitignore' .) | \ | |
sort | \ | |
python3 "$PYTHON_SCRIPT" "$INPUT_DIR" \ | |
) > "$OUTPUT_FILE" 2>&1 | tee /dev/stderr | |
# Or simpler if you just want python errors/logs on terminal: | |
# (cd "$INPUT_DIR" && rg ... | sort | python3 ... ) > "$OUTPUT_FILE" | |
# (The python script already prints logs to stderr) | |
echo "Processing complete. Output written to $OUTPUT_FILE" | |
""" | |
# process_files.py | |
import sys | |
import os | |
import argparse | |
# Define the set of supported file extensions (case-insensitive) for language tagging | |
# We don't *strictly* need this for filtering anymore as rg does that, | |
# but it's essential for determining the language tag. | |
SUPPORTED_EXTENSIONS_FOR_TAGGING: set[str] = {'.md', '.py', '.toml', '.gitignore'} | |
def get_language_tag(extension: str) -> str: | |
""" | |
Determines the Markdown language tag based on the file extension. | |
""" | |
ext_lower = extension.lower() | |
if ext_lower == '.py': | |
return 'python' | |
elif ext_lower == '.md': | |
return 'markdown' | |
elif ext_lower == '.toml': | |
return 'toml' | |
elif ext_lower == '.gitignore': | |
return 'gitignore' | |
# Default for any other unforeseen extensions rg might pass through | |
return 'text' | |
def process_files(base_input_dir: str) -> None: | |
""" | |
Reads relative file paths from stdin, reconstructs full paths using | |
base_input_dir, reads file content, and prints formatted Markdown | |
to stdout. | |
""" | |
processed_files_count = 0 | |
errors = [] | |
# Process each line (relative file path) from standard input | |
for relative_path_line in sys.stdin: | |
relative_path = relative_path_line.strip() | |
if not relative_path: # Skip empty lines, just in case | |
continue | |
# Ensure consistent forward slashes for display, although os.path.join handles os.sep | |
relative_path_md = relative_path.replace(os.sep, '/') | |
# Reconstruct the full path to read the file | |
# Note: os.path.join is crucial here for cross-platform compatibility | |
full_path = os.path.join(base_input_dir, relative_path) | |
print(f"Processing: {relative_path_md}", file=sys.stderr) # Log to stderr | |
try: | |
# Get the extension for language tagging | |
_ , extension = os.path.splitext(relative_path) # Use relative_path here is fine | |
# Open and read the file content | |
with open(full_path, 'r', encoding='utf-8', errors='replace') as f: | |
file_content = f.read() | |
lang_tag = get_language_tag(extension) | |
# --- Print formatted output to standard output --- | |
print(f"# {relative_path_md}\n") | |
print(f"```{lang_tag}") | |
print(file_content, end='') # Use end='' as file_content likely has its own ending newline | |
# Ensure a newline exists before the closing fence for clarity | |
if file_content and not file_content.endswith('\n'): | |
print() # Add a newline if the file doesn't end with one | |
print("```\n") | |
# --- End of formatted output --- | |
processed_files_count += 1 | |
except FileNotFoundError: | |
error_msg = f"Skipped: File not found (perhaps removed after rg scan?): {relative_path_md}" | |
errors.append(error_msg) | |
print(error_msg, file=sys.stderr) | |
except IOError as e: | |
error_msg = f"Skipped: IO Error reading {relative_path_md}: {e}" | |
errors.append(error_msg) | |
print(error_msg, file=sys.stderr) | |
except PermissionError as e: | |
error_msg = f"Skipped: Permission Error reading {relative_path_md}: {e}" | |
errors.append(error_msg) | |
print(error_msg, file=sys.stderr) | |
except Exception as e: | |
# Catch any other unexpected error during file processing | |
error_msg = f"Skipped: Unexpected error processing {relative_path_md}: {e}" | |
errors.append(error_msg) | |
print(error_msg, file=sys.stderr) | |
# Final summary information to stderr | |
if processed_files_count == 0: | |
print("\nWarning: No files were processed.", file=sys.stderr) | |
if errors: | |
print("\nEncountered errors or warnings during processing:", file=sys.stderr) | |
unique_errors = sorted(list(set(errors))) | |
for err in unique_errors: | |
print(f"- {err}", file=sys.stderr) | |
print(f"\nPython script finished processing {processed_files_count} files.", file=sys.stderr) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
description="Reads relative file paths from stdin, reads file contents, " | |
"and outputs formatted Markdown to stdout." | |
) | |
parser.add_argument( | |
"base_input_dir", | |
help="The original base directory from which rg searched. " | |
"Needed to reconstruct full paths." | |
) | |
args = parser.parse_args() | |
# Make sure the base directory is absolute for robustness | |
abs_base_input_dir = os.path.abspath(args.base_input_dir) | |
if not os.path.isdir(abs_base_input_dir): | |
print(f"Error: Provided base input directory '{abs_base_input_dir}' does not exist or is not a directory.", file=sys.stderr) | |
sys.exit(1) | |
try: | |
process_files(abs_base_input_dir) | |
sys.exit(0) # Explicit success | |
except Exception as e: | |
print(f"\nAn unexpected critical error occurred in the Python script: {e}", file=sys.stderr) | |
import traceback | |
traceback.print_exc(file=sys.stderr) | |
sys.exit(2) # Exit with error code 2 for unexpected errors |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment