Last active
September 23, 2024 19:52
-
-
Save WillPapper/969751a6945c65e86e1069f59f81b3cd to your computer and use it in GitHub Desktop.
Concatenate Text V2 (Additional debug output, better file handling)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import mimetypes | |
import datetime | |
def is_text_file(file_path): | |
# First, try using mimetypes | |
mime_type, _ = mimetypes.guess_type(file_path) | |
if mime_type and mime_type.startswith('text/'): | |
return True | |
# If mimetypes fails, check the file extension. Feel free to add more languages here | |
text_extensions = {'.txt', '.md', '.mdx', '.py', '.js', '.html', '.css', '.json', '.xml', '.yml', '.yaml', '.go', '.rs', '.sol', '.sh', '.bat', '.ps1'} | |
file_extension = os.path.splitext(file_path)[1].lower() | |
return file_extension in text_extensions | |
# Specify the folder path containing the text files. Can be a relative or absolute path | |
folder_path = "/Users/your-username/path/goes/here" | |
# Specify the output file path | |
output_file = "concatenated_text.txt" | |
# Open the output file in write mode (this will overwrite the file if it exists) | |
with open(output_file, "w", encoding="utf-8") as outfile: | |
# Write header information | |
outfile.write(f"Concatenated Text File\n") | |
outfile.write(f"Generated on: {datetime.datetime.now()}\n") | |
outfile.write(f"Source folder: {folder_path}\n\n") | |
outfile.write("=" * 80 + "\n\n") | |
# Iterate over each directory and subdirectory using os.walk() | |
for root, dirs, files in os.walk(folder_path): | |
# Iterate over each file | |
for file in files: | |
file_path = os.path.join(root, file) | |
# Debug output: Print the file path | |
print(f"File: {file_path}") | |
# Check if it's a text file using our custom function | |
if is_text_file(file_path): | |
print("Detected as text file. Processing...") | |
# Open the text file in read mode | |
try: | |
with open(file_path, "r", encoding="utf-8") as infile: | |
# Write the relative path to the file as a heading | |
relative_path = os.path.relpath(file_path, folder_path) | |
outfile.write(f"File: {relative_path}\n") | |
outfile.write(f"Full path: {file_path}\n") | |
outfile.write("-" * 80 + "\n\n") | |
# Write the contents of the text file to the output file | |
outfile.write(infile.read()) | |
# Add a line break between files | |
outfile.write("\n\n" + "=" * 80 + "\n\n") | |
except UnicodeDecodeError: | |
print(f"Error: Unable to read {file_path} as text. Skipping.") | |
else: | |
print("Not detected as text file. Skipping.") | |
print("------------------------") | |
print("Concatenation complete. Output file:", output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment