Created
May 10, 2025 20:08
-
-
Save Fusion/6c6863d07191a0699886e4f92d121f24 to your computer and use it in GitHub Desktop.
Quick n Dirty Split to Markdown for AnyBox Copy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import re | |
def sanitize_filename(name): | |
# Keep letters, numbers, spaces, and hyphens | |
sanitized = re.sub(r'[^a-zA-Z0-9 \-]', '', name) | |
sanitized = sanitized.strip() # .replace(' ', '_') # Optional: replace spaces with underscores | |
return sanitized | |
def split_file(input_file): | |
if not os.path.isfile(input_file): | |
print(f"File not found: {input_file}") | |
return | |
with open(input_file, 'r', encoding='utf-8') as f: | |
content = f.read() | |
chunks = content.split('THISISASPLIT') | |
for chunk in chunks: | |
lines = [line for line in chunk.strip().splitlines() if line.strip()] | |
if not lines: | |
continue | |
first_line = lines[0] | |
filename_base = sanitize_filename(first_line) | |
if not filename_base: | |
print("Skipping chunk due to invalid filename.") | |
continue | |
new_filename = f"{filename_base}.md" | |
body = "\n".join(lines[1:]) | |
with open(new_filename, 'w', encoding='utf-8') as out_file: | |
out_file.write(body) | |
print(f"Written: {new_filename}") | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
print("Usage: python split_chunks.py <inputfile>") | |
else: | |
split_file(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment