Skip to content

Instantly share code, notes, and snippets.

@sulmanweb
Created November 29, 2024 11:42
Show Gist options
  • Save sulmanweb/ee1541b1739b06db6695370cbc8a480d to your computer and use it in GitHub Desktop.
Save sulmanweb/ee1541b1739b06db6695370cbc8a480d to your computer and use it in GitHub Desktop.
require 'fileutils'
require 'digest'
ALWAYS_IGNORE = ['.git', 'tmp', 'log', '.ruby-lsp', '.github', '.devcontainer', 'storage', '.annotaterb.yml', 'public', '.cursorrules'].freeze
IGNORED_EXTENSIONS = %w[.jpg .jpeg .png .gif .bmp .svg .webp .ico .pdf .tiff .raw .keep .gitkeep .sample .staging].freeze
MAX_FILE_SIZE = 1_000_000 # 1MB
CHUNK_SIZE = 100_000 # 100KB
def read_gitignore(directory_path)
gitignore_path = File.join(directory_path, '.gitignore')
return [] unless File.exist?(gitignore_path)
File.readlines(gitignore_path).map(&:chomp).reject(&:empty?)
end
def ignored?(path, base_path, ignore_patterns)
relative_path = path.sub("#{base_path}/", '')
return true if ALWAYS_IGNORE.any? { |dir| relative_path.start_with?(dir + '/') || relative_path == dir }
return true if IGNORED_EXTENSIONS.include?(File.extname(path).downcase) || File.basename(path) == '.keep'
ignore_patterns.any? do |pattern|
File.fnmatch?(pattern, relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH) ||
File.fnmatch?(File.join('**', pattern), relative_path, File::FNM_PATHNAME | File::FNM_DOTMATCH)
end
end
def convert_to_markdown(file_path)
extension = File.extname(file_path).downcase[1..]
format = extension.nil? || extension.empty? ? 'text' : extension
begin
content = File.read(file_path, encoding: 'UTF-8')
"## #{File.basename(file_path)}\n\n```#{format}\n#{content.strip}\n```\n\n"
rescue StandardError => e
"## #{File.basename(file_path)}\n\n[File content not displayed: #{e.message}]\n\n"
end
end
def generate_tree_markdown(tree, prefix = '')
result = ''
tree.each do |key, value|
result += "#{prefix}- #{key}\n"
result += generate_tree_markdown(value, prefix + ' ') if value.is_a?(Hash)
end
result
end
def write_chunked_output(output_file, content)
base_name = File.basename(output_file, '.*')
extension = File.extname(output_file)
dir_name = File.dirname(output_file)
chunk_index = 1
offset = 0
while offset < content.length
chunk = content[offset, CHUNK_SIZE]
chunk_file = File.join(dir_name, "#{base_name}_part#{chunk_index}#{extension}")
File.open(chunk_file, 'w:UTF-8') do |file|
file.write("---\n")
file.write("chunk: #{chunk_index}\n")
file.write("total_chunks: #{(content.length.to_f / CHUNK_SIZE).ceil}\n")
file.write("---\n\n")
file.write(chunk)
end
puts "Markdown file created: #{chunk_file}"
offset += CHUNK_SIZE
chunk_index += 1
end
end
def process_directory(directory_path, output_file)
ignore_patterns = read_gitignore(directory_path)
markdown_content = "---\nencoding: utf-8\n---\n\n# Project Structure\n\n"
file_contents = []
file_tree = {}
Dir.glob("#{directory_path}/**/*", File::FNM_DOTMATCH).each do |file_path|
next if File.directory?(file_path)
next if ['.', '..'].include?(File.basename(file_path))
next if ignored?(file_path, directory_path, ignore_patterns)
next if File.size(file_path) > MAX_FILE_SIZE
relative_path = file_path.sub("#{directory_path}/", '')
parts = relative_path.split('/')
current = file_tree
parts.each_with_index do |part, index|
if index == parts.size - 1
current[part] = nil
else
current[part] ||= {}
current = current[part]
end
end
file_contents << convert_to_markdown(file_path)
end
markdown_content += generate_tree_markdown(file_tree)
markdown_content += "\n# File Contents\n\n"
markdown_content += file_contents.join("\n")
write_chunked_output(output_file, markdown_content)
end
if ARGV.length != 2
puts "Usage: ruby script.rb <input_directory> <output_file>"
exit 1
end
input_directory = ARGV[0]
output_file = ARGV[1]
process_directory(input_directory, output_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment