Skip to content

Instantly share code, notes, and snippets.

@SonOfLilit
Created October 30, 2024 16:49
Show Gist options
  • Save SonOfLilit/7ad24948d31c96fd99af366f4d7c590b to your computer and use it in GitHub Desktop.
Save SonOfLilit/7ad24948d31c96fd99af366f4d7c590b to your computer and use it in GitHub Desktop.
Split text file by filename/path comments (e.g. for writing LLM outputs to disk)
import sys
import os
from typing import Dict
import re
def parse_files(content: str) -> Dict[str, str]:
"""Parse the input content into a dictionary of filepath -> content."""
files = {}
maybe_files = set()
current_file = None
current_content = []
# Match both JS-style and Python-style comments for file paths
file_pattern = re.compile(r'^(?://|#)\s+([a-zA-Z0-9./_-]+\.[a-zA-Z0-9]+)\s*$')
weak_file_pattern = re.compile(r'^(?://|#)\s+(\S+)\s*$')
for line in content.splitlines(keepends=True):
# Check if line starts a new file
match = file_pattern.match(line)
weak_match = weak_file_pattern.match(line)
if match:
if current_file:
assert current_file not in files, f"{current_file} appears more than once"
# Save the previous file
files[current_file] = ''.join(current_content)
current_file = match.group(1)
current_content = []
elif current_file:
# Add line to current file content
current_content.append(line)
if weak_match:
maybe_files.add(weak_match.group(1))
else:
assert False
# Don't forget to save the last file
if current_file:
files[current_file] = ''.join(current_content)
return files, maybe_files
def ensure_directory(filepath: str) -> None:
"""Ensure all directories in the filepath exist."""
directory = os.path.dirname(filepath)
if directory:
os.makedirs(directory, exist_ok=True)
def main() -> None:
if len(sys.argv) != 2:
print(f"Usage: {sys.argv[0]} <input_file>")
sys.exit(1)
input_file = sys.argv[1]
try:
with open(input_file, 'r') as f:
content = f.read()
except FileNotFoundError:
print(f"Error: Could not find file '{input_file}'")
sys.exit(1)
files, maybe_files = parse_files(content)
# Preview the files to be written
for filepath, content in files.items():
line_count = len(content.split('\n')) if content else "()"
print(f"{filepath} ({line_count} lines)")
if maybe_files:
print("\nNOT detected as files:")
for filepath in maybe_files:
print(filepath)
response = input("\nWrite? [y/N]")
if response.lower() != 'y':
print("Aborted.")
sys.exit(0)
# Write the files
for filepath, content in files.items():
try:
ensure_directory(filepath)
with open(filepath, 'w') as f:
f.write(content)
except IOError as e:
print(f"Error writing {filepath}: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
@cben
Copy link

cben commented Oct 30, 2024

file_pattern will allow writes outside current directory, e.g. # /etc/foo.conf or # ../../../foo/bar.sh or # foo/../../../bar.sh...
Interactively prompting is good 👍, but consider also tightening against path traversal.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment