swayson · April 23, 2025 05:12
diff --git a/process_files.py b/process_files.py
 """

 # --- Define variables first for clarity (optional, but recommended) ---
 INPUT_DIR="path/to/your/project"
 OUTPUT_FILE="collated_output.md"
 PYTHON_SCRIPT="path/to/process_files.py" # Use absolute or relative path

 # --- The actual command ---
 (cd "$INPUT_DIR" && rg --files -g '*.py' -g '*.md' -g '*.toml' -g '.gitignore' .) | \
  sort | \
  python3 "$PYTHON_SCRIPT" "$INPUT_DIR" > "$OUTPUT_FILE"

 # --- Optional: Print progress messages from Python to the terminal ---
 # If you want to see the "Processing: ..." messages from the Python script
 # while still redirecting the main output:
 (
  (cd "$INPUT_DIR" && rg --files -g '*.py' -g '*.md' -g '*.toml' -g '.gitignore' .) | \
    sort | \
    python3 "$PYTHON_SCRIPT" "$INPUT_DIR" \
 ) > "$OUTPUT_FILE" 2>&1 | tee /dev/stderr
 # Or simpler if you just want python errors/logs on terminal:
 # (cd "$INPUT_DIR" && rg ... | sort | python3 ... ) > "$OUTPUT_FILE"
 # (The python script already prints logs to stderr)

 echo "Processing complete. Output written to $OUTPUT_FILE"


 """

 # process_files.py
 import sys
 import os
 import argparse

 # Define the set of supported file extensions (case-insensitive) for language tagging
 # We don't *strictly* need this for filtering anymore as rg does that,
 # but it's essential for determining the language tag.
 SUPPORTED_EXTENSIONS_FOR_TAGGING: set[str] = {'.md', '.py', '.toml', '.gitignore'}

 def get_language_tag(extension: str) -> str:
    """
    Determines the Markdown language tag based on the file extension.
    """
    ext_lower = extension.lower()
    if ext_lower == '.py':
        return 'python'
    elif ext_lower == '.md':
        return 'markdown'
    elif ext_lower == '.toml':
        return 'toml'
    elif ext_lower == '.gitignore':
        return 'gitignore'
    # Default for any other unforeseen extensions rg might pass through
    return 'text'

 def process_files(base_input_dir: str) -> None:
    """
    Reads relative file paths from stdin, reconstructs full paths using
    base_input_dir, reads file content, and prints formatted Markdown
    to stdout.
    """
    processed_files_count = 0
    errors = []

    # Process each line (relative file path) from standard input
    for relative_path_line in sys.stdin:
        relative_path = relative_path_line.strip()
        if not relative_path: # Skip empty lines, just in case
            continue

        # Ensure consistent forward slashes for display, although os.path.join handles os.sep
        relative_path_md = relative_path.replace(os.sep, '/')
        # Reconstruct the full path to read the file
        # Note: os.path.join is crucial here for cross-platform compatibility
        full_path = os.path.join(base_input_dir, relative_path)

        print(f"Processing: {relative_path_md}", file=sys.stderr) # Log to stderr

        try:
            # Get the extension for language tagging
            _ , extension = os.path.splitext(relative_path) # Use relative_path here is fine

            # Open and read the file content
            with open(full_path, 'r', encoding='utf-8', errors='replace') as f:
                file_content = f.read()

            lang_tag = get_language_tag(extension)

            # --- Print formatted output to standard output ---
            print(f"# {relative_path_md}\n")
            print(f"```{lang_tag}")
            print(file_content, end='') # Use end='' as file_content likely has its own ending newline
            # Ensure a newline exists before the closing fence for clarity
            if file_content and not file_content.endswith('\n'):
                print() # Add a newline if the file doesn't end with one
            print("```\n")
            # --- End of formatted output ---

            processed_files_count += 1

        except FileNotFoundError:
             error_msg = f"Skipped: File not found (perhaps removed after rg scan?): {relative_path_md}"
             errors.append(error_msg)
             print(error_msg, file=sys.stderr)
        except IOError as e:
             error_msg = f"Skipped: IO Error reading {relative_path_md}: {e}"
             errors.append(error_msg)
             print(error_msg, file=sys.stderr)
        except PermissionError as e:
             error_msg = f"Skipped: Permission Error reading {relative_path_md}: {e}"
             errors.append(error_msg)
             print(error_msg, file=sys.stderr)
        except Exception as e:
             # Catch any other unexpected error during file processing
             error_msg = f"Skipped: Unexpected error processing {relative_path_md}: {e}"
             errors.append(error_msg)
             print(error_msg, file=sys.stderr)

    # Final summary information to stderr
    if processed_files_count == 0:
        print("\nWarning: No files were processed.", file=sys.stderr)

    if errors:
        print("\nEncountered errors or warnings during processing:", file=sys.stderr)
        unique_errors = sorted(list(set(errors)))
        for err in unique_errors:
            print(f"- {err}", file=sys.stderr)

    print(f"\nPython script finished processing {processed_files_count} files.", file=sys.stderr)


 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Reads relative file paths from stdin, reads file contents, "
                    "and outputs formatted Markdown to stdout."
    )
    parser.add_argument(
        "base_input_dir",
        help="The original base directory from which rg searched. "
             "Needed to reconstruct full paths."
    )
    args = parser.parse_args()

    # Make sure the base directory is absolute for robustness
    abs_base_input_dir = os.path.abspath(args.base_input_dir)

    if not os.path.isdir(abs_base_input_dir):
        print(f"Error: Provided base input directory '{abs_base_input_dir}' does not exist or is not a directory.", file=sys.stderr)
        sys.exit(1)

    try:
        process_files(abs_base_input_dir)
        sys.exit(0) # Explicit success
    except Exception as e:
         print(f"\nAn unexpected critical error occurred in the Python script: {e}", file=sys.stderr)
         import traceback
         traceback.print_exc(file=sys.stderr)
         sys.exit(2) # Exit with error code 2 for unexpected errors
	"""

	# --- Define variables first for clarity (optional, but recommended) ---
	INPUT_DIR="path/to/your/project"
	OUTPUT_FILE="collated_output.md"
	PYTHON_SCRIPT="path/to/process_files.py" # Use absolute or relative path

	# --- The actual command ---
	(cd "$INPUT_DIR" && rg --files -g '.py' -g '.md' -g '*.toml' -g '.gitignore' .) \| \
	sort \| \
	python3 "$PYTHON_SCRIPT" "$INPUT_DIR" > "$OUTPUT_FILE"

	# --- Optional: Print progress messages from Python to the terminal ---
	# If you want to see the "Processing: ..." messages from the Python script
	# while still redirecting the main output:
	(
	(cd "$INPUT_DIR" && rg --files -g '.py' -g '.md' -g '*.toml' -g '.gitignore' .) \| \
	sort \| \
	python3 "$PYTHON_SCRIPT" "$INPUT_DIR" \
	) > "$OUTPUT_FILE" 2>&1 \| tee /dev/stderr
	# Or simpler if you just want python errors/logs on terminal:
	# (cd "$INPUT_DIR" && rg ... \| sort \| python3 ... ) > "$OUTPUT_FILE"
	# (The python script already prints logs to stderr)

	echo "Processing complete. Output written to $OUTPUT_FILE"


	"""

	# process_files.py
	import sys
	import os
	import argparse

	# Define the set of supported file extensions (case-insensitive) for language tagging
	# We don't strictly need this for filtering anymore as rg does that,
	# but it's essential for determining the language tag.
	SUPPORTED_EXTENSIONS_FOR_TAGGING: set[str] = {'.md', '.py', '.toml', '.gitignore'}

	def get_language_tag(extension: str) -> str:
	"""
	Determines the Markdown language tag based on the file extension.
	"""
	ext_lower = extension.lower()
	if ext_lower == '.py':
	return 'python'
	elif ext_lower == '.md':
	return 'markdown'
	elif ext_lower == '.toml':
	return 'toml'
	elif ext_lower == '.gitignore':
	return 'gitignore'
	# Default for any other unforeseen extensions rg might pass through
	return 'text'

	def process_files(base_input_dir: str) -> None:
	"""
	Reads relative file paths from stdin, reconstructs full paths using
	base_input_dir, reads file content, and prints formatted Markdown
	to stdout.
	"""
	processed_files_count = 0
	errors = []

	# Process each line (relative file path) from standard input
	for relative_path_line in sys.stdin:
	relative_path = relative_path_line.strip()
	if not relative_path: # Skip empty lines, just in case
	continue

	# Ensure consistent forward slashes for display, although os.path.join handles os.sep
	relative_path_md = relative_path.replace(os.sep, '/')
	# Reconstruct the full path to read the file
	# Note: os.path.join is crucial here for cross-platform compatibility
	full_path = os.path.join(base_input_dir, relative_path)

	print(f"Processing: {relative_path_md}", file=sys.stderr) # Log to stderr

	try:
	# Get the extension for language tagging
	_ , extension = os.path.splitext(relative_path) # Use relative_path here is fine

	# Open and read the file content
	with open(full_path, 'r', encoding='utf-8', errors='replace') as f:
	file_content = f.read()

	lang_tag = get_language_tag(extension)

	# --- Print formatted output to standard output ---
	print(f"# {relative_path_md}\n")
	print(f"```{lang_tag}")
	print(file_content, end='') # Use end='' as file_content likely has its own ending newline
	# Ensure a newline exists before the closing fence for clarity
	if file_content and not file_content.endswith('\n'):
	print() # Add a newline if the file doesn't end with one
	print("```\n")
	# --- End of formatted output ---

	processed_files_count += 1

	except FileNotFoundError:
	error_msg = f"Skipped: File not found (perhaps removed after rg scan?): {relative_path_md}"
	errors.append(error_msg)
	print(error_msg, file=sys.stderr)
	except IOError as e:
	error_msg = f"Skipped: IO Error reading {relative_path_md}: {e}"
	errors.append(error_msg)
	print(error_msg, file=sys.stderr)
	except PermissionError as e:
	error_msg = f"Skipped: Permission Error reading {relative_path_md}: {e}"
	errors.append(error_msg)
	print(error_msg, file=sys.stderr)
	except Exception as e:
	# Catch any other unexpected error during file processing
	error_msg = f"Skipped: Unexpected error processing {relative_path_md}: {e}"
	errors.append(error_msg)
	print(error_msg, file=sys.stderr)

	# Final summary information to stderr
	if processed_files_count == 0:
	print("\nWarning: No files were processed.", file=sys.stderr)

	if errors:
	print("\nEncountered errors or warnings during processing:", file=sys.stderr)
	unique_errors = sorted(list(set(errors)))
	for err in unique_errors:
	print(f"- {err}", file=sys.stderr)

	print(f"\nPython script finished processing {processed_files_count} files.", file=sys.stderr)


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Reads relative file paths from stdin, reads file contents, "
	"and outputs formatted Markdown to stdout."
	)
	parser.add_argument(
	"base_input_dir",
	help="The original base directory from which rg searched. "
	"Needed to reconstruct full paths."
	)
	args = parser.parse_args()

	# Make sure the base directory is absolute for robustness
	abs_base_input_dir = os.path.abspath(args.base_input_dir)

	if not os.path.isdir(abs_base_input_dir):
	print(f"Error: Provided base input directory '{abs_base_input_dir}' does not exist or is not a directory.", file=sys.stderr)
	sys.exit(1)

	try:
	process_files(abs_base_input_dir)
	sys.exit(0) # Explicit success
	except Exception as e:
	print(f"\nAn unexpected critical error occurred in the Python script: {e}", file=sys.stderr)
	import traceback
	traceback.print_exc(file=sys.stderr)
	sys.exit(2) # Exit with error code 2 for unexpected errors