Skip to content

Instantly share code, notes, and snippets.

@WillPapper
Last active September 23, 2024 19:52
Show Gist options
  • Save WillPapper/969751a6945c65e86e1069f59f81b3cd to your computer and use it in GitHub Desktop.
Save WillPapper/969751a6945c65e86e1069f59f81b3cd to your computer and use it in GitHub Desktop.
Concatenate Text V2 (Additional debug output, better file handling)
import os
import mimetypes
import datetime
def is_text_file(file_path):
# First, try using mimetypes
mime_type, _ = mimetypes.guess_type(file_path)
if mime_type and mime_type.startswith('text/'):
return True
# If mimetypes fails, check the file extension. Feel free to add more languages here
text_extensions = {'.txt', '.md', '.mdx', '.py', '.js', '.html', '.css', '.json', '.xml', '.yml', '.yaml', '.go', '.rs', '.sol', '.sh', '.bat', '.ps1'}
file_extension = os.path.splitext(file_path)[1].lower()
return file_extension in text_extensions
# Specify the folder path containing the text files. Can be a relative or absolute path
folder_path = "/Users/your-username/path/goes/here"
# Specify the output file path
output_file = "concatenated_text.txt"
# Open the output file in write mode (this will overwrite the file if it exists)
with open(output_file, "w", encoding="utf-8") as outfile:
# Write header information
outfile.write(f"Concatenated Text File\n")
outfile.write(f"Generated on: {datetime.datetime.now()}\n")
outfile.write(f"Source folder: {folder_path}\n\n")
outfile.write("=" * 80 + "\n\n")
# Iterate over each directory and subdirectory using os.walk()
for root, dirs, files in os.walk(folder_path):
# Iterate over each file
for file in files:
file_path = os.path.join(root, file)
# Debug output: Print the file path
print(f"File: {file_path}")
# Check if it's a text file using our custom function
if is_text_file(file_path):
print("Detected as text file. Processing...")
# Open the text file in read mode
try:
with open(file_path, "r", encoding="utf-8") as infile:
# Write the relative path to the file as a heading
relative_path = os.path.relpath(file_path, folder_path)
outfile.write(f"File: {relative_path}\n")
outfile.write(f"Full path: {file_path}\n")
outfile.write("-" * 80 + "\n\n")
# Write the contents of the text file to the output file
outfile.write(infile.read())
# Add a line break between files
outfile.write("\n\n" + "=" * 80 + "\n\n")
except UnicodeDecodeError:
print(f"Error: Unable to read {file_path} as text. Skipping.")
else:
print("Not detected as text file. Skipping.")
print("------------------------")
print("Concatenation complete. Output file:", output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment