Created
May 16, 2024 18:05
-
-
Save zeroasterisk/2a043f988c2bc3f72e8373a2c2407811 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import fnmatch | |
import os | |
import re | |
class CodeToMarkdown: | |
def __init__(self, output_filename, src_folders, exclusion_patterns, inclusion_patterns): | |
self.output_filename = output_filename | |
self.src_folders = src_folders | |
self.exclusion_patterns = exclusion_patterns | |
self.inclusion_patterns = inclusion_patterns | |
self.included_files = 0 | |
def process_folder(self, folder): | |
""" | |
Recursively processes a folder, extracting code from files and generating markdown content. | |
""" | |
content = "" | |
for entry in os.scandir(folder): | |
excluded = self.is_excluded(entry.path) | |
included = self.is_included(entry.path) | |
# flags = [ | |
# "DIR " if entry.is_dir() else "", | |
# "EXCLUDED " if excluded else "", | |
# "INCLUDED " if included else "", | |
# ] | |
# print(f".{entry.path} [{' '.join(flags)}]") | |
if entry.is_dir() and not excluded: | |
# Recursively call process_folder for subdirectories | |
content += self.process_folder(entry.path) | |
elif entry.is_file() and not excluded and included: | |
filepath = entry.path | |
ext = self.get_file_extension(filepath) | |
content += f"## File: {filepath}\n\n" | |
content += f"```{ext}\n" | |
with open(filepath, "r") as f: | |
content += f.read() | |
content += "\n```\n\n" | |
self.included_files += 1 | |
return content | |
def is_excluded(self, filepath): | |
""" | |
Checks if a file should be excluded based on the given patterns. | |
""" | |
for pattern in self.exclusion_patterns: | |
if isinstance(pattern, str): | |
if fnmatch.fnmatch(filepath, pattern): | |
return True | |
elif isinstance(pattern, re.Pattern): | |
if pattern.search(filepath): | |
return True | |
return False | |
def is_included(self, filepath): | |
""" | |
Checks if a file should be included based on the given patterns. | |
""" | |
for pattern in self.inclusion_patterns: | |
if isinstance(pattern, str): | |
if fnmatch.fnmatch(filepath, pattern): | |
return True | |
elif isinstance(pattern, re.Pattern): | |
if pattern.search(filepath): | |
return True | |
return False | |
def get_file_extension(self, filepath): | |
""" | |
Extracts the file extension from a filepath. | |
""" | |
return os.path.splitext(filepath)[1][1:] | |
def generate_markdown(self): | |
""" | |
Main function to process source folders and generate the markdown file. | |
""" | |
all_content = "" | |
for folder in self.src_folders: | |
all_content += self.process_folder(folder) | |
with open(self.output_filename, "w") as f: | |
f.write(all_content) | |
print(f"Markdown file generated: {self.output_filename}") | |
print(f"Processed folders: {self.src_folders}") | |
print(f"Exclusion patterns: {self.exclusion_patterns}") | |
print(f"Inclusion patterns: {self.inclusion_patterns}") | |
print(f"Count files incldued: {self.included_files}") | |
if __name__ == "__main__": | |
# Replace with desired values | |
# output_filename = "codebase.md" | |
output_filename = "codebase-to-replicate-for-problem-solver.md" | |
src_folders = ["/Users/alanblount/Code/cai-platform/src/python"] | |
exclusion_patterns = ["*.pyc", re.compile(r"(\.git|node_modules)/")] | |
inclusion_patterns = ["*.py"] | |
converter = CodeToMarkdown(output_filename, src_folders, exclusion_patterns, inclusion_patterns) | |
converter.generate_markdown() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment