Created
October 30, 2024 16:49
-
-
Save SonOfLilit/7ad24948d31c96fd99af366f4d7c590b to your computer and use it in GitHub Desktop.
Split text file by filename/path comments (e.g. for writing LLM outputs to disk)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
from typing import Dict | |
import re | |
def parse_files(content: str) -> Dict[str, str]: | |
"""Parse the input content into a dictionary of filepath -> content.""" | |
files = {} | |
maybe_files = set() | |
current_file = None | |
current_content = [] | |
# Match both JS-style and Python-style comments for file paths | |
file_pattern = re.compile(r'^(?://|#)\s+([a-zA-Z0-9./_-]+\.[a-zA-Z0-9]+)\s*$') | |
weak_file_pattern = re.compile(r'^(?://|#)\s+(\S+)\s*$') | |
for line in content.splitlines(keepends=True): | |
# Check if line starts a new file | |
match = file_pattern.match(line) | |
weak_match = weak_file_pattern.match(line) | |
if match: | |
if current_file: | |
assert current_file not in files, f"{current_file} appears more than once" | |
# Save the previous file | |
files[current_file] = ''.join(current_content) | |
current_file = match.group(1) | |
current_content = [] | |
elif current_file: | |
# Add line to current file content | |
current_content.append(line) | |
if weak_match: | |
maybe_files.add(weak_match.group(1)) | |
else: | |
assert False | |
# Don't forget to save the last file | |
if current_file: | |
files[current_file] = ''.join(current_content) | |
return files, maybe_files | |
def ensure_directory(filepath: str) -> None: | |
"""Ensure all directories in the filepath exist.""" | |
directory = os.path.dirname(filepath) | |
if directory: | |
os.makedirs(directory, exist_ok=True) | |
def main() -> None: | |
if len(sys.argv) != 2: | |
print(f"Usage: {sys.argv[0]} <input_file>") | |
sys.exit(1) | |
input_file = sys.argv[1] | |
try: | |
with open(input_file, 'r') as f: | |
content = f.read() | |
except FileNotFoundError: | |
print(f"Error: Could not find file '{input_file}'") | |
sys.exit(1) | |
files, maybe_files = parse_files(content) | |
# Preview the files to be written | |
for filepath, content in files.items(): | |
line_count = len(content.split('\n')) if content else "()" | |
print(f"{filepath} ({line_count} lines)") | |
if maybe_files: | |
print("\nNOT detected as files:") | |
for filepath in maybe_files: | |
print(filepath) | |
response = input("\nWrite? [y/N]") | |
if response.lower() != 'y': | |
print("Aborted.") | |
sys.exit(0) | |
# Write the files | |
for filepath, content in files.items(): | |
try: | |
ensure_directory(filepath) | |
with open(filepath, 'w') as f: | |
f.write(content) | |
except IOError as e: | |
print(f"Error writing {filepath}: {e}") | |
sys.exit(1) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
file_pattern
will allow writes outside current directory, e.g.# /etc/foo.conf
or# ../../../foo/bar.sh
or# foo/../../../bar.sh
...Interactively prompting is good 👍, but consider also tightening against path traversal.