-
-
Save tarekbadrsh/e6aa6cde277cf1af9031e245b0b82d58 to your computer and use it in GitHub Desktop.
| import os | |
| import fnmatch | |
| def get_comment_prefix(filename): | |
| extension_to_comment = { | |
| '.asm': (';', ';'), | |
| '.awk': ('#', '#'), | |
| '.c': ('//', '//'), | |
| '.clj': (';;', ';;'), | |
| '.cpp': ('//', '//'), | |
| '.css': ('/*', '*/'), | |
| '.cs': ('//', '//'), | |
| '.dart': ('//', '//'), | |
| '.dockerfile': ('#', '#'), | |
| '.ex': ('#', '#'), | |
| '.erl': ('%', '%'), | |
| '.fs': ('//', '//'), | |
| '.f90': ('!', '!'), | |
| '.go': ('//', '//'), | |
| '.groovy': ('//', '//'), | |
| '.hs': ('--', '--'), | |
| '.html': ('<!--', '-->'), | |
| '.java': ('//', '//'), | |
| '.js': ('//', '//'), | |
| '.jl': ('#', '#'), | |
| '.kt': ('//', '//'), | |
| '.latex': ('%', '%'), | |
| '.lisp': (';', ';'), | |
| '.lua': ('--', '--'), | |
| '.mk': ('#', '#'), | |
| '.md': ('<!--', '-->'), | |
| '.m': ('%', '%'), | |
| '.mm': ('//', '//'), | |
| '.ml': ('//', '//'), | |
| '.pas': ('//', '//'), | |
| '.pl': ('#', '#'), | |
| '.php': ('//', '//'), | |
| '.plain': ('#', '#'), | |
| '.ps1': ('#', '#'), | |
| '.py': ('##', '##'), | |
| '.r': ('#', '#'), | |
| '.rb': ('#', '#'), | |
| '.rs': ('//', '//'), | |
| '.scala': ('//', '//'), | |
| '.scm': (';', ';'), | |
| '.sed': ('#', '#'), | |
| '.sh': ('#', '#'), | |
| '.st': ('"', '"'), | |
| '.sql': ('--', '--'), | |
| '.swift': ('//', '//'), | |
| '.ts': ('//', '//'), | |
| '.tsx': ('//', '//'), | |
| '.vb': ("'", "'"), | |
| '.xml': ('<!--', '-->'), | |
| '.yaml': ('#', '#'), | |
| } | |
| _, ext = os.path.splitext(filename) | |
| return extension_to_comment.get(ext, ('#', '#')) | |
| def read_gitignore(directory): | |
| result = [".git", ".gitignore", ".dockerignore"] | |
| gitignore_path = os.path.join(directory, '.gitignore') | |
| if os.path.exists(gitignore_path): | |
| with open(gitignore_path, 'r') as gitignore_file: | |
| result.extend([line.strip() for line in gitignore_file if line.strip( | |
| ) and not line.startswith('#')]) | |
| return result | |
| def should_ignore(path, ignore_patterns): | |
| for pattern in ignore_patterns: | |
| if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(os.path.basename(path), pattern): | |
| return True | |
| return False | |
| def read_and_save_files(directory, output_file, ignore_patterns): | |
| with open(output_file, 'w') as outfile: | |
| for root, dirs, files in os.walk(directory): | |
| # Remove directories that match ignore patterns | |
| dirs[:] = [d for d in dirs if not should_ignore( | |
| os.path.join(root, d), ignore_patterns)] | |
| for filename in files: | |
| full_path = os.path.join(root, filename) | |
| if should_ignore(full_path, ignore_patterns): | |
| continue | |
| comment_prefix = get_comment_prefix(filename) | |
| try: | |
| with open(full_path, 'r') as infile: | |
| outfile.write( | |
| f"{comment_prefix[0]}---FILE_PATH---{full_path}---FILE_PATH---{comment_prefix[1]}\n") | |
| outfile.write(infile.read()) | |
| outfile.write("\n") | |
| except Exception as e: | |
| print(f"Error reading file {full_path}: {e}") | |
| if __name__ == "__main__": | |
| directory = input("Enter the directory path: ") | |
| output_file = os.path.join( | |
| directory, f"{os.path.basename(directory)}_output.txt") | |
| # Read patterns from .gitignore if it exists | |
| ignore_patterns = read_gitignore(directory) | |
| read_and_save_files(directory, output_file, ignore_patterns) | |
| print(f"Output saved to {output_file}") | |
You can ignore directories or files by editing the ignore_dirs and ignore_files
def read_and_save_files(directory, output_file):
ignore_dirs = ["__pycache__", "venv", ".git"]
ignore_files = [
"collect_all_files.py",
"README.md",
"LICENSE",
".gitignore",
".git",
".vscode",
".idea",
".dockerignore",
".gitingore",
]
with open(output_file, "w") as outfile:
for root, _, files in os.walk(directory):
if any(ignore_dir in root for ignore_dir in ignore_dirs):
continue
for filename in files:
if any(ignore_file in filename for ignore_file in ignore_files):
continue
if filename == os.path.basename(output_file):
continue
full_path = os.path.join(root, filename)
comment_prefix = get_comment_prefix(filename)
with open(full_path, "r") as infile:
outfile.write(
f"{comment_prefix[0]}---FILE_PATH---{full_path}---FILE_PATH---{comment_prefix[1]}\n"
)
outfile.write(infile.read())
outfile.write("\n")
It would be better if we allow users to specify what to include or exclude by using the content of .gitignore. Additionally, we should consider adding the .git directory, since it is not ignored by .gitignore.
It would be better if we allow users to specify what to include or exclude by using the content of
.gitignore. Additionally, we should consider adding the.gitdirectory, since it is not ignored by.gitignore.
I was thinking of it but it will be tricky to implement if you need to support all .gitignore hacks, I will try to implement an initial version of it
@tarekbadrsh Check this
import os
import fnmatch
def get_comment_prefix(filename):
extension_to_comment = {
".asm": (";", ";"),
".awk": ("#", "#"),
".c": ("//", "//"),
".clj": (";;", ";;"),
".cpp": ("//", "//"),
".css": ("/*", "*/"),
".cs": ("//", "//"),
".dart": ("//", "//"),
".dockerfile": ("#", "#"),
".ex": ("#", "#"),
".erl": ("%", "%"),
".fs": ("//", "//"),
".f90": ("!", "!"),
".go": ("//", "//"),
".groovy": ("//", "//"),
".hs": ("--", "--"),
".html": ("<!--", "-->"),
".java": ("//", "//"),
".js": ("//", "//"),
".jl": ("#", "#"),
".kt": ("//", "//"),
".latex": ("%", "%"),
".lisp": (";", ";"),
".lua": ("--", "--"),
".mk": ("#", "#"),
".md": ("<!--", "-->"),
".m": ("%", "%"),
".mm": ("//", "//"),
".ml": ("//", "//"),
".pas": ("//", "//"),
".pl": ("#", "#"),
".php": ("//", "//"),
".plain": ("#", "#"),
".ps1": ("#", "#"),
".py": ("##", "##"),
".r": ("#", "#"),
".rb": ("#", "#"),
".rs": ("//", "//"),
".scala": ("//", "//"),
".scm": (";", ";"),
".sed": ("#", "#"),
".sh": ("#", "#"),
".st": ('"', '"'),
".sql": ("--", "--"),
".swift": ("//", "//"),
".ts": ("//", "//"),
".tsx": ("//", "//"),
".vb": ("'", "'"),
".xml": ("<!--", "-->"),
".yaml": ("#", "#"),
}
_, ext = os.path.splitext(filename)
return extension_to_comment.get(ext, ("#", "#"))
def dot_ignore_loader(directory):
ignore_files = [".gitignore", ".dockerignore", ".gitingore"]
ignore_lines = []
for ignore_file in ignore_files:
ignore_file_path = os.path.join(directory, ignore_file)
if os.path.exists(ignore_file_path):
with open(ignore_file_path, "r") as infile:
for line in infile:
line = line.strip()
if not line or line.startswith("#"):
continue
ignore_lines.append(line)
return ignore_lines
def is_ignored(path, patterns):
for pattern in patterns:
if fnmatch.fnmatch(path, pattern) or fnmatch.fnmatch(
os.path.basename(path), pattern
):
return True
return False
def read_and_save_files(directory, output_file):
ignore_patterns = dot_ignore_loader(directory)
# add additional ignore patterns here like .gitingore patterns
additional_ignore_patterns = [
".git*",
".vscode*",
".gitignore",
]
ignore_patterns.extend(additional_ignore_patterns)
with open(output_file, "w") as outfile:
for root, _, files in os.walk(directory):
for filename in files:
full_path = os.path.join(root, filename)
rel_path = os.path.relpath(full_path, directory)
if is_ignored(rel_path, ignore_patterns):
continue
if filename == os.path.basename(output_file):
continue
comment_prefix = get_comment_prefix(filename)
with open(full_path, "r") as infile:
outfile.write(
f"{comment_prefix[0]}---FILE_PATH---{full_path}---FILE_PATH---{comment_prefix[1]}\n"
)
outfile.write(infile.read())
outfile.write("\n")
if __name__ == "__main__":
directory = input("Enter the directory path: ")
output_file = os.path.join(directory, f"{os.path.basename(directory)}_output.txt")
read_and_save_files(directory, output_file)
Thank @EmadAnwer, for your suggestions on using .gitignore in the script.
I have made some updates to your code and hope they are helpful for everyone.
Cheers 🍻
The output file
/path/to/your/directory/directory_name_output.txtwill contain:This script is useful for code review, backup, or documentation purposes where combining multiple files into one with clear annotations is beneficial.