Created
November 29, 2024 11:42
-
-
Save sulmanweb/ee1541b1739b06db6695370cbc8a480d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'fileutils' | |
require 'digest' | |
ALWAYS_IGNORE = ['.git', 'tmp', 'log', '.ruby-lsp', '.github', '.devcontainer', 'storage', '.annotaterb.yml', 'public', '.cursorrules'].freeze | |
IGNORED_EXTENSIONS = %w[.jpg .jpeg .png .gif .bmp .svg .webp .ico .pdf .tiff .raw .keep .gitkeep .sample .staging].freeze | |
MAX_FILE_SIZE = 1_000_000 # 1MB | |
CHUNK_SIZE = 100_000 # 100KB | |
def read_gitignore(directory_path) | |
gitignore_path = File.join(directory_path, '.gitignore') | |
return [] unless File.exist?(gitignore_path) | |
File.readlines(gitignore_path).map(&:chomp).reject(&:empty?) | |
end | |
def ignored?(path, base_path, ignore_patterns) | |
relative_path = path.sub("#{base_path}/", '') | |
return true if ALWAYS_IGNORE.any? { |dir| relative_path.start_with?(dir + '/') || relative_path == dir } | |
return true if IGNORED_EXTENSIONS.include?(File.extname(path).downcase) || File.basename(path) == '.keep' | |
ignore_patterns.any? do |pattern| | |
File.fnmatch?(pattern, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH) || | |
File.fnmatch?(File.join('**', pattern), relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH) | |
end | |
end | |
def convert_to_markdown(file_path) | |
extension = File.extname(file_path).downcase[1..] | |
format = extension.nil? || extension.empty? ? 'text' : extension | |
begin | |
content = File.read(file_path, encoding: 'UTF-8') | |
"## #{File.basename(file_path)}\n\n```#{format}\n#{content.strip}\n```\n\n" | |
rescue StandardError => e | |
"## #{File.basename(file_path)}\n\n[File content not displayed: #{e.message}]\n\n" | |
end | |
end | |
def generate_tree_markdown(tree, prefix = '') | |
result = '' | |
tree.each do |key, value| | |
result += "#{prefix}- #{key}\n" | |
result += generate_tree_markdown(value, prefix + ' ') if value.is_a?(Hash) | |
end | |
result | |
end | |
def write_chunked_output(output_file, content) | |
base_name = File.basename(output_file, '.*') | |
extension = File.extname(output_file) | |
dir_name = File.dirname(output_file) | |
chunk_index = 1 | |
offset = 0 | |
while offset < content.length | |
chunk = content[offset, CHUNK_SIZE] | |
chunk_file = File.join(dir_name, "#{base_name}_part#{chunk_index}#{extension}") | |
File.open(chunk_file, 'w:UTF-8') do |file| | |
file.write("---\n") | |
file.write("chunk: #{chunk_index}\n") | |
file.write("total_chunks: #{(content.length.to_f / CHUNK_SIZE).ceil}\n") | |
file.write("---\n\n") | |
file.write(chunk) | |
end | |
puts "Markdown file created: #{chunk_file}" | |
offset += CHUNK_SIZE | |
chunk_index += 1 | |
end | |
end | |
def process_directory(directory_path, output_file) | |
ignore_patterns = read_gitignore(directory_path) | |
markdown_content = "---\nencoding: utf-8\n---\n\n# Project Structure\n\n" | |
file_contents = [] | |
file_tree = {} | |
Dir.glob("#{directory_path}/**/*", File::FNM_DOTMATCH).each do |file_path| | |
next if File.directory?(file_path) | |
next if ['.', '..'].include?(File.basename(file_path)) | |
next if ignored?(file_path, directory_path, ignore_patterns) | |
next if File.size(file_path) > MAX_FILE_SIZE | |
relative_path = file_path.sub("#{directory_path}/", '') | |
parts = relative_path.split('/') | |
current = file_tree | |
parts.each_with_index do |part, index| | |
if index == parts.size - 1 | |
current[part] = nil | |
else | |
current[part] ||= {} | |
current = current[part] | |
end | |
end | |
file_contents << convert_to_markdown(file_path) | |
end | |
markdown_content += generate_tree_markdown(file_tree) | |
markdown_content += "\n# File Contents\n\n" | |
markdown_content += file_contents.join("\n") | |
write_chunked_output(output_file, markdown_content) | |
end | |
if ARGV.length != 2 | |
puts "Usage: ruby script.rb <input_directory> <output_file>" | |
exit 1 | |
end | |
input_directory = ARGV[0] | |
output_file = ARGV[1] | |
process_directory(input_directory, output_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment