Skip to content

Instantly share code, notes, and snippets.

@tarekbadrsh
Last active July 26, 2024 07:09
Show Gist options
  • Save tarekbadrsh/e6aa6cde277cf1af9031e245b0b82d58 to your computer and use it in GitHub Desktop.
Save tarekbadrsh/e6aa6cde277cf1af9031e245b0b82d58 to your computer and use it in GitHub Desktop.
Multi-File Commenter and Combiner
import os
import fnmatch
def get_comment_prefix(filename):
extension_to_comment = {
'.asm': (';', ';'),
'.awk': ('#', '#'),
'.c': ('//', '//'),
'.clj': (';;', ';;'),
'.cpp': ('//', '//'),
'.css': ('/*', '*/'),
'.cs': ('//', '//'),
'.dart': ('//', '//'),
'.dockerfile': ('#', '#'),
'.ex': ('#', '#'),
'.erl': ('%', '%'),
'.fs': ('//', '//'),
'.f90': ('!', '!'),
'.go': ('//', '//'),
'.groovy': ('//', '//'),
'.hs': ('--', '--'),
'.html': ('<!--', '-->'),
'.java': ('//', '//'),
'.js': ('//', '//'),
'.jl': ('#', '#'),
'.kt': ('//', '//'),
'.latex': ('%', '%'),
'.lisp': (';', ';'),
'.lua': ('--', '--'),
'.mk': ('#', '#'),
'.md': ('<!--', '-->'),
'.m': ('%', '%'),
'.mm': ('//', '//'),
'.ml': ('//', '//'),
'.pas': ('//', '//'),
'.pl': ('#', '#'),
'.php': ('//', '//'),
'.plain': ('#', '#'),
'.ps1': ('#', '#'),
'.py': ('##', '##'),
'.r': ('#', '#'),
'.rb': ('#', '#'),
'.rs': ('//', '//'),
'.scala': ('//', '//'),
'.scm': (';', ';'),
'.sed': ('#', '#'),
'.sh': ('#', '#'),
'.st': ('"', '"'),
'.sql': ('--', '--'),
'.swift': ('//', '//'),
'.ts': ('//', '//'),
'.tsx': ('//', '//'),
'.vb': ("'", "'"),
'.xml': ('<!--', '-->'),
'.yaml': ('#', '#'),
}
_, ext = os.path.splitext(filename)
return extension_to_comment.get(ext, ('#', '#'))
def read_gitignore(directory):
result = [".git", ".gitignore", ".dockerignore"]
gitignore_path = os.path.join(directory, '.gitignore')
if os.path.exists(gitignore_path):
with open(gitignore_path, 'r') as gitignore_file:
result.extend([line.strip() for line in gitignore_file if line.strip(
) and not line.startswith('#')])
return result
def should_ignore(path, ignore_patterns):
for pattern in ignore_patterns:
if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(os.path.basename(path), pattern):
return True
return False
def read_and_save_files(directory, output_file, ignore_patterns):
with open(output_file, 'w') as outfile:
for root, dirs, files in os.walk(directory):
# Remove directories that match ignore patterns
dirs[:] = [d for d in dirs if not should_ignore(
os.path.join(root, d), ignore_patterns)]
for filename in files:
full_path = os.path.join(root, filename)
if should_ignore(full_path, ignore_patterns):
continue
comment_prefix = get_comment_prefix(filename)
try:
with open(full_path, 'r') as infile:
outfile.write(
f"{comment_prefix[0]}---FILE_PATH---{full_path}---FILE_PATH---{comment_prefix[1]}\n")
outfile.write(infile.read())
outfile.write("\n")
except Exception as e:
print(f"Error reading file {full_path}: {e}")
if __name__ == "__main__":
directory = input("Enter the directory path: ")
output_file = os.path.join(
directory, f"{os.path.basename(directory)}_output.txt")
# Read patterns from .gitignore if it exists
ignore_patterns = read_gitignore(directory)
read_and_save_files(directory, output_file, ignore_patterns)
print(f"Output saved to {output_file}")
@tarekbadrsh
Copy link
Author

It would be better if we allow users to specify what to include or exclude by using the content of .gitignore. Additionally, we should consider adding the .git directory, since it is not ignored by .gitignore.

@EmadAnwer
Copy link

EmadAnwer commented Jul 20, 2024

It would be better if we allow users to specify what to include or exclude by using the content of .gitignore. Additionally, we should consider adding the .git directory, since it is not ignored by .gitignore.

I was thinking of it but it will be tricky to implement if you need to support all .gitignore hacks, I will try to implement an initial version of it

@EmadAnwer
Copy link

@tarekbadrsh Check this

import os
import fnmatch


def get_comment_prefix(filename):
    extension_to_comment = {
        ".asm": (";", ";"),
        ".awk": ("#", "#"),
        ".c": ("//", "//"),
        ".clj": (";;", ";;"),
        ".cpp": ("//", "//"),
        ".css": ("/*", "*/"),
        ".cs": ("//", "//"),
        ".dart": ("//", "//"),
        ".dockerfile": ("#", "#"),
        ".ex": ("#", "#"),
        ".erl": ("%", "%"),
        ".fs": ("//", "//"),
        ".f90": ("!", "!"),
        ".go": ("//", "//"),
        ".groovy": ("//", "//"),
        ".hs": ("--", "--"),
        ".html": ("<!--", "-->"),
        ".java": ("//", "//"),
        ".js": ("//", "//"),
        ".jl": ("#", "#"),
        ".kt": ("//", "//"),
        ".latex": ("%", "%"),
        ".lisp": (";", ";"),
        ".lua": ("--", "--"),
        ".mk": ("#", "#"),
        ".md": ("<!--", "-->"),
        ".m": ("%", "%"),
        ".mm": ("//", "//"),
        ".ml": ("//", "//"),
        ".pas": ("//", "//"),
        ".pl": ("#", "#"),
        ".php": ("//", "//"),
        ".plain": ("#", "#"),
        ".ps1": ("#", "#"),
        ".py": ("##", "##"),
        ".r": ("#", "#"),
        ".rb": ("#", "#"),
        ".rs": ("//", "//"),
        ".scala": ("//", "//"),
        ".scm": (";", ";"),
        ".sed": ("#", "#"),
        ".sh": ("#", "#"),
        ".st": ('"', '"'),
        ".sql": ("--", "--"),
        ".swift": ("//", "//"),
        ".ts": ("//", "//"),
        ".tsx": ("//", "//"),
        ".vb": ("'", "'"),
        ".xml": ("<!--", "-->"),
        ".yaml": ("#", "#"),
    }

    _, ext = os.path.splitext(filename)
    return extension_to_comment.get(ext, ("#", "#"))


def dot_ignore_loader(directory):
    ignore_files = [".gitignore", ".dockerignore", ".gitingore"]
    ignore_lines = []
    for ignore_file in ignore_files:
        ignore_file_path = os.path.join(directory, ignore_file)
        if os.path.exists(ignore_file_path):
            with open(ignore_file_path, "r") as infile:
                for line in infile:
                    line = line.strip()
                    if not line or line.startswith("#"):
                        continue
                    ignore_lines.append(line)
    return ignore_lines


def is_ignored(path, patterns):
    for pattern in patterns:
        if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(
            os.path.basename(path), pattern
        ):
            return True
    return False


def read_and_save_files(directory, output_file):
    ignore_patterns = dot_ignore_loader(directory)

    # add additional ignore patterns here like .gitingore patterns
    additional_ignore_patterns = [
        ".git*",
        ".vscode*",
        ".gitignore",
    ]
    ignore_patterns.extend(additional_ignore_patterns)
    with open(output_file, "w") as outfile:
        for root, _, files in os.walk(directory):
            for filename in files:
                full_path = os.path.join(root, filename)
                rel_path = os.path.relpath(full_path, directory)
                if is_ignored(rel_path, ignore_patterns):
                    continue
                if filename == os.path.basename(output_file):
                    continue
                comment_prefix = get_comment_prefix(filename)
                with open(full_path, "r") as infile:
                    outfile.write(
                        f"{comment_prefix[0]}---FILE_PATH---{full_path}---FILE_PATH---{comment_prefix[1]}\n"
                    )
                    outfile.write(infile.read())
                    outfile.write("\n")


if __name__ == "__main__":
    directory = input("Enter the directory path: ")
    output_file = os.path.join(directory, f"{os.path.basename(directory)}_output.txt")
    read_and_save_files(directory, output_file)

@tarekbadrsh
Copy link
Author

Thank @EmadAnwer, for your suggestions on using .gitignore in the script.
I have made some updates to your code and hope they are helpful for everyone.

Cheers 🍻

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment