-
-
Save tarekbadrsh/e6aa6cde277cf1af9031e245b0b82d58 to your computer and use it in GitHub Desktop.
import os | |
import fnmatch | |
def get_comment_prefix(filename): | |
extension_to_comment = { | |
'.asm': (';', ';'), | |
'.awk': ('#', '#'), | |
'.c': ('//', '//'), | |
'.clj': (';;', ';;'), | |
'.cpp': ('//', '//'), | |
'.css': ('/*', '*/'), | |
'.cs': ('//', '//'), | |
'.dart': ('//', '//'), | |
'.dockerfile': ('#', '#'), | |
'.ex': ('#', '#'), | |
'.erl': ('%', '%'), | |
'.fs': ('//', '//'), | |
'.f90': ('!', '!'), | |
'.go': ('//', '//'), | |
'.groovy': ('//', '//'), | |
'.hs': ('--', '--'), | |
'.html': ('<!--', '-->'), | |
'.java': ('//', '//'), | |
'.js': ('//', '//'), | |
'.jl': ('#', '#'), | |
'.kt': ('//', '//'), | |
'.latex': ('%', '%'), | |
'.lisp': (';', ';'), | |
'.lua': ('--', '--'), | |
'.mk': ('#', '#'), | |
'.md': ('<!--', '-->'), | |
'.m': ('%', '%'), | |
'.mm': ('//', '//'), | |
'.ml': ('//', '//'), | |
'.pas': ('//', '//'), | |
'.pl': ('#', '#'), | |
'.php': ('//', '//'), | |
'.plain': ('#', '#'), | |
'.ps1': ('#', '#'), | |
'.py': ('##', '##'), | |
'.r': ('#', '#'), | |
'.rb': ('#', '#'), | |
'.rs': ('//', '//'), | |
'.scala': ('//', '//'), | |
'.scm': (';', ';'), | |
'.sed': ('#', '#'), | |
'.sh': ('#', '#'), | |
'.st': ('"', '"'), | |
'.sql': ('--', '--'), | |
'.swift': ('//', '//'), | |
'.ts': ('//', '//'), | |
'.tsx': ('//', '//'), | |
'.vb': ("'", "'"), | |
'.xml': ('<!--', '-->'), | |
'.yaml': ('#', '#'), | |
} | |
_, ext = os.path.splitext(filename) | |
return extension_to_comment.get(ext, ('#', '#')) | |
def read_gitignore(directory): | |
result = [".git", ".gitignore", ".dockerignore"] | |
gitignore_path = os.path.join(directory, '.gitignore') | |
if os.path.exists(gitignore_path): | |
with open(gitignore_path, 'r') as gitignore_file: | |
result.extend([line.strip() for line in gitignore_file if line.strip( | |
) and not line.startswith('#')]) | |
return result | |
def should_ignore(path, ignore_patterns): | |
for pattern in ignore_patterns: | |
if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(os.path.basename(path), pattern): | |
return True | |
return False | |
def read_and_save_files(directory, output_file, ignore_patterns): | |
with open(output_file, 'w') as outfile: | |
for root, dirs, files in os.walk(directory): | |
# Remove directories that match ignore patterns | |
dirs[:] = [d for d in dirs if not should_ignore( | |
os.path.join(root, d), ignore_patterns)] | |
for filename in files: | |
full_path = os.path.join(root, filename) | |
if should_ignore(full_path, ignore_patterns): | |
continue | |
comment_prefix = get_comment_prefix(filename) | |
try: | |
with open(full_path, 'r') as infile: | |
outfile.write( | |
f"{comment_prefix[0]}---FILE_PATH---{full_path}---FILE_PATH---{comment_prefix[1]}\n") | |
outfile.write(infile.read()) | |
outfile.write("\n") | |
except Exception as e: | |
print(f"Error reading file {full_path}: {e}") | |
if __name__ == "__main__": | |
directory = input("Enter the directory path: ") | |
output_file = os.path.join( | |
directory, f"{os.path.basename(directory)}_output.txt") | |
# Read patterns from .gitignore if it exists | |
ignore_patterns = read_gitignore(directory) | |
read_and_save_files(directory, output_file, ignore_patterns) | |
print(f"Output saved to {output_file}") | |
You can ignore directories or files by editing the ignore_dirs
and ignore_files
def read_and_save_files(directory, output_file):
ignore_dirs = ["__pycache__", "venv", ".git"]
ignore_files = [
"collect_all_files.py",
"README.md",
"LICENSE",
".gitignore",
".git",
".vscode",
".idea",
".dockerignore",
".gitingore",
]
with open(output_file, "w") as outfile:
for root, _, files in os.walk(directory):
if any(ignore_dir in root for ignore_dir in ignore_dirs):
continue
for filename in files:
if any(ignore_file in filename for ignore_file in ignore_files):
continue
if filename == os.path.basename(output_file):
continue
full_path = os.path.join(root, filename)
comment_prefix = get_comment_prefix(filename)
with open(full_path, "r") as infile:
outfile.write(
f"{comment_prefix[0]}---FILE_PATH---{full_path}---FILE_PATH---{comment_prefix[1]}\n"
)
outfile.write(infile.read())
outfile.write("\n")
It would be better if we allow users to specify what to include or exclude by using the content of .gitignore
. Additionally, we should consider adding the .git
directory, since it is not ignored by .gitignore
.
It would be better if we allow users to specify what to include or exclude by using the content of
.gitignore
. Additionally, we should consider adding the.git
directory, since it is not ignored by.gitignore
.
I was thinking of it but it will be tricky to implement if you need to support all .gitignore
hacks, I will try to implement an initial version of it
@tarekbadrsh Check this
import os
import fnmatch
def get_comment_prefix(filename):
extension_to_comment = {
".asm": (";", ";"),
".awk": ("#", "#"),
".c": ("//", "//"),
".clj": (";;", ";;"),
".cpp": ("//", "//"),
".css": ("/*", "*/"),
".cs": ("//", "//"),
".dart": ("//", "//"),
".dockerfile": ("#", "#"),
".ex": ("#", "#"),
".erl": ("%", "%"),
".fs": ("//", "//"),
".f90": ("!", "!"),
".go": ("//", "//"),
".groovy": ("//", "//"),
".hs": ("--", "--"),
".html": ("<!--", "-->"),
".java": ("//", "//"),
".js": ("//", "//"),
".jl": ("#", "#"),
".kt": ("//", "//"),
".latex": ("%", "%"),
".lisp": (";", ";"),
".lua": ("--", "--"),
".mk": ("#", "#"),
".md": ("<!--", "-->"),
".m": ("%", "%"),
".mm": ("//", "//"),
".ml": ("//", "//"),
".pas": ("//", "//"),
".pl": ("#", "#"),
".php": ("//", "//"),
".plain": ("#", "#"),
".ps1": ("#", "#"),
".py": ("##", "##"),
".r": ("#", "#"),
".rb": ("#", "#"),
".rs": ("//", "//"),
".scala": ("//", "//"),
".scm": (";", ";"),
".sed": ("#", "#"),
".sh": ("#", "#"),
".st": ('"', '"'),
".sql": ("--", "--"),
".swift": ("//", "//"),
".ts": ("//", "//"),
".tsx": ("//", "//"),
".vb": ("'", "'"),
".xml": ("<!--", "-->"),
".yaml": ("#", "#"),
}
_, ext = os.path.splitext(filename)
return extension_to_comment.get(ext, ("#", "#"))
def dot_ignore_loader(directory):
ignore_files = [".gitignore", ".dockerignore", ".gitingore"]
ignore_lines = []
for ignore_file in ignore_files:
ignore_file_path = os.path.join(directory, ignore_file)
if os.path.exists(ignore_file_path):
with open(ignore_file_path, "r") as infile:
for line in infile:
line = line.strip()
if not line or line.startswith("#"):
continue
ignore_lines.append(line)
return ignore_lines
def is_ignored(path, patterns):
for pattern in patterns:
if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(
os.path.basename(path), pattern
):
return True
return False
def read_and_save_files(directory, output_file):
ignore_patterns = dot_ignore_loader(directory)
# add additional ignore patterns here like .gitingore patterns
additional_ignore_patterns = [
".git*",
".vscode*",
".gitignore",
]
ignore_patterns.extend(additional_ignore_patterns)
with open(output_file, "w") as outfile:
for root, _, files in os.walk(directory):
for filename in files:
full_path = os.path.join(root, filename)
rel_path = os.path.relpath(full_path, directory)
if is_ignored(rel_path, ignore_patterns):
continue
if filename == os.path.basename(output_file):
continue
comment_prefix = get_comment_prefix(filename)
with open(full_path, "r") as infile:
outfile.write(
f"{comment_prefix[0]}---FILE_PATH---{full_path}---FILE_PATH---{comment_prefix[1]}\n"
)
outfile.write(infile.read())
outfile.write("\n")
if __name__ == "__main__":
directory = input("Enter the directory path: ")
output_file = os.path.join(directory, f"{os.path.basename(directory)}_output.txt")
read_and_save_files(directory, output_file)
Thank @EmadAnwer, for your suggestions on using .gitignore
in the script.
I have made some updates to your code and hope they are helpful for everyone.
Cheers 🍻
The output file
/path/to/your/directory/directory_name_output.txt
will contain:This script is useful for code review, backup, or documentation purposes where combining multiple files into one with clear annotations is beneficial.