Skip to content

Instantly share code, notes, and snippets.

@swayson
Created April 23, 2025 05:12
Show Gist options
  • Save swayson/cad3aa565d63e0977b23d993ec673347 to your computer and use it in GitHub Desktop.
Save swayson/cad3aa565d63e0977b23d993ec673347 to your computer and use it in GitHub Desktop.
"""
# --- Define variables first for clarity (optional, but recommended) ---
INPUT_DIR="path/to/your/project"
OUTPUT_FILE="collated_output.md"
PYTHON_SCRIPT="path/to/process_files.py" # Use absolute or relative path
# --- The actual command ---
(cd "$INPUT_DIR" && rg --files -g '*.py' -g '*.md' -g '*.toml' -g '.gitignore' .) | \
sort | \
python3 "$PYTHON_SCRIPT" "$INPUT_DIR" > "$OUTPUT_FILE"
# --- Optional: Print progress messages from Python to the terminal ---
# If you want to see the "Processing: ..." messages from the Python script
# while still redirecting the main output:
(
(cd "$INPUT_DIR" && rg --files -g '*.py' -g '*.md' -g '*.toml' -g '.gitignore' .) | \
sort | \
python3 "$PYTHON_SCRIPT" "$INPUT_DIR" \
) > "$OUTPUT_FILE" 2>&1 | tee /dev/stderr
# Or simpler if you just want python errors/logs on terminal:
# (cd "$INPUT_DIR" && rg ... | sort | python3 ... ) > "$OUTPUT_FILE"
# (The python script already prints logs to stderr)
echo "Processing complete. Output written to $OUTPUT_FILE"
"""
# process_files.py
import sys
import os
import argparse
# Define the set of supported file extensions (case-insensitive) for language tagging
# We don't *strictly* need this for filtering anymore as rg does that,
# but it's essential for determining the language tag.
SUPPORTED_EXTENSIONS_FOR_TAGGING: set[str] = {'.md', '.py', '.toml', '.gitignore'}
def get_language_tag(extension: str) -> str:
"""
Determines the Markdown language tag based on the file extension.
"""
ext_lower = extension.lower()
if ext_lower == '.py':
return 'python'
elif ext_lower == '.md':
return 'markdown'
elif ext_lower == '.toml':
return 'toml'
elif ext_lower == '.gitignore':
return 'gitignore'
# Default for any other unforeseen extensions rg might pass through
return 'text'
def process_files(base_input_dir: str) -> None:
"""
Reads relative file paths from stdin, reconstructs full paths using
base_input_dir, reads file content, and prints formatted Markdown
to stdout.
"""
processed_files_count = 0
errors = []
# Process each line (relative file path) from standard input
for relative_path_line in sys.stdin:
relative_path = relative_path_line.strip()
if not relative_path: # Skip empty lines, just in case
continue
# Ensure consistent forward slashes for display, although os.path.join handles os.sep
relative_path_md = relative_path.replace(os.sep, '/')
# Reconstruct the full path to read the file
# Note: os.path.join is crucial here for cross-platform compatibility
full_path = os.path.join(base_input_dir, relative_path)
print(f"Processing: {relative_path_md}", file=sys.stderr) # Log to stderr
try:
# Get the extension for language tagging
_ , extension = os.path.splitext(relative_path) # Use relative_path here is fine
# Open and read the file content
with open(full_path, 'r', encoding='utf-8', errors='replace') as f:
file_content = f.read()
lang_tag = get_language_tag(extension)
# --- Print formatted output to standard output ---
print(f"# {relative_path_md}\n")
print(f"```{lang_tag}")
print(file_content, end='') # Use end='' as file_content likely has its own ending newline
# Ensure a newline exists before the closing fence for clarity
if file_content and not file_content.endswith('\n'):
print() # Add a newline if the file doesn't end with one
print("```\n")
# --- End of formatted output ---
processed_files_count += 1
except FileNotFoundError:
error_msg = f"Skipped: File not found (perhaps removed after rg scan?): {relative_path_md}"
errors.append(error_msg)
print(error_msg, file=sys.stderr)
except IOError as e:
error_msg = f"Skipped: IO Error reading {relative_path_md}: {e}"
errors.append(error_msg)
print(error_msg, file=sys.stderr)
except PermissionError as e:
error_msg = f"Skipped: Permission Error reading {relative_path_md}: {e}"
errors.append(error_msg)
print(error_msg, file=sys.stderr)
except Exception as e:
# Catch any other unexpected error during file processing
error_msg = f"Skipped: Unexpected error processing {relative_path_md}: {e}"
errors.append(error_msg)
print(error_msg, file=sys.stderr)
# Final summary information to stderr
if processed_files_count == 0:
print("\nWarning: No files were processed.", file=sys.stderr)
if errors:
print("\nEncountered errors or warnings during processing:", file=sys.stderr)
unique_errors = sorted(list(set(errors)))
for err in unique_errors:
print(f"- {err}", file=sys.stderr)
print(f"\nPython script finished processing {processed_files_count} files.", file=sys.stderr)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Reads relative file paths from stdin, reads file contents, "
"and outputs formatted Markdown to stdout."
)
parser.add_argument(
"base_input_dir",
help="The original base directory from which rg searched. "
"Needed to reconstruct full paths."
)
args = parser.parse_args()
# Make sure the base directory is absolute for robustness
abs_base_input_dir = os.path.abspath(args.base_input_dir)
if not os.path.isdir(abs_base_input_dir):
print(f"Error: Provided base input directory '{abs_base_input_dir}' does not exist or is not a directory.", file=sys.stderr)
sys.exit(1)
try:
process_files(abs_base_input_dir)
sys.exit(0) # Explicit success
except Exception as e:
print(f"\nAn unexpected critical error occurred in the Python script: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
sys.exit(2) # Exit with error code 2 for unexpected errors
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment