Skip to content

Instantly share code, notes, and snippets.

@b08x
Created January 3, 2026 02:03
Show Gist options
  • Select an option

  • Save b08x/2996034b0dd018d72f2dc77f42902a54 to your computer and use it in GitHub Desktop.

Select an option

Save b08x/2996034b0dd018d72f2dc77f42902a54 to your computer and use it in GitHub Desktop.
This script generates Jekyll wiki pages from JSON data files exported from DeepWiki-Open generated wikis
#!/usr/bin/env ruby
# frozen_string_literal: true
# This script generates Jekyll wiki pages from JSON data files exported from DeepWiki-Open generated wikis, located in `_data/wikis`.
# It handles slug generation, pagination, repository link transformation, and cross-linking between wiki pages.
#
# @author b08x
# @version 1.1.0
require 'jekyll'
require 'fileutils'
require 'json'
require 'yaml'
# Configuration constants
ITEMS_PER_PAGE = 12
RESERVED_SLUGS = ['page'].freeze
# Converts a string into a URL-friendly slug.
#
# @param string [String] the string to slugify
# @return [String] the slugified string
def slugify(string)
Jekyll::Utils.slugify(string)
end
# Validates a slug against a list of reserved keywords.
# If the slug is reserved, it appends '-content' to avoid conflicts.
#
# @param slug [String] the slug to validate
# @return [String] the validated (and possibly modified) slug
def validate_slug(slug)
if RESERVED_SLUGS.include?(slug)
puts "Warning: Page slug '#{slug}' conflicts with reserved keyword. Using '#{slug}-content' instead."
return "#{slug}-content"
end
slug
end
# Extracts a text excerpt from content by stripping HTML and Markdown tags.
#
# @param content [String] the raw content string
# @param length [Integer] the approximate length of the excerpt
# @return [String] the cleaned and truncated excerpt
def extract_excerpt(content, length = 200)
return "" if content.nil? || content.empty?
# Strip HTML tags
stripped = content.gsub(/<\/?[^>]*>/, "")
# Remove markdown syntax
stripped = stripped.gsub(/```[a-z]*!?\n.*?\n```/m, "") # Code blocks
stripped = stripped.gsub(/[*_`#\[\]()]/, "") # Inline formatting
stripped = stripped.gsub(/^\s*[-+*]\s+/, "") # List markers
stripped = stripped.gsub(/^\s*\d+\.\s+/, "") # Numbered lists
# Collapse whitespace
stripped = stripped.gsub(/\s+/, " ").strip
# Truncate to length
if stripped.length > length
stripped[0...length].gsub(/\s+\S*$/, "") + "..."
else
stripped
end
end
# Slices a list of pages into segments for pagination.
#
# @param pages [Array<Hash>] the collection of page data
# @param per_page [Integer] number of items per page
# @return [Array<Array<Hash>>] an array of page segments
def calculate_pagination_segments(pages, per_page)
return [] if pages.nil? || pages.empty?
pages.each_slice(per_page).to_a
end
# Builds pagination metadata for a specific index page.
#
# @param page_num [Integer] the current page number
# @param total_pages [Integer] the total number of pages
# @param wiki_id [String] the identifier for the wiki
# @return [Hash] the pagination metadata
def build_pagination_metadata(page_num, total_pages, wiki_id)
has_previous = page_num > 1
has_next = page_num < total_pages
previous_page_url = if has_previous
page_num == 2 ? "/wikis/#{wiki_id}/" : "/wikis/#{wiki_id}/page/#{page_num - 1}/"
else
nil
end
next_page_url = if has_next
"/wikis/#{wiki_id}/page/#{page_num + 1}/"
else
nil
end
{
'enabled' => true,
'current_page' => page_num,
'total_pages' => total_pages,
'per_page' => ITEMS_PER_PAGE,
'has_previous' => has_previous,
'has_next' => has_next,
'previous_page_url' => previous_page_url,
'next_page_url' => next_page_url
}
end
# Generates an individual Markdown page for a wiki entry.
#
# @param source_dir [String] the root directory of the site
# @param wiki_id [String] the identifier for the wiki
# @param page_data [Hash] the data for the specific page
# @param prev_page [Hash, nil] the data for the previous page
# @param next_page [Hash, nil] the data for the next page
# @param permalink_map [Hash] map of page IDs to their calculated permalinks and slugs
# @param repo_url [String, nil] the base URL for the repository
# @return [void]
def generate_page(source_dir, wiki_id, page_data, prev_page, next_page, permalink_map, repo_url)
page_info = permalink_map[page_data['id']]
slug = page_info['slug']
num = page_info['number']
permalink = page_info['permalink']
title = page_data['title']
content_body = page_data['content'] || ""
# Transformations for jekyll-spaceship
content_body = content_body.gsub(/```mermaid\b/, '```mermaid!')
content_body = content_body.gsub(/```plantuml\b/, '```plantuml!')
# Prepend repository address to inline source links like [path.py:L10-L20]()
if repo_url
clean_repo_url = repo_url.chomp('/')
content_body = content_body.gsub(/\[([^\]]+\.(?:py|md|sh|rb|js|ts|tsx|json|html|yml)(?::#?L[^\]]*)?)\]\(\)/) do |_match|
path_and_line = Regexp.last_match(1)
# Split path and line if present (e.g., path.py:#L10-L20 or path.py:L10-L20)
path, line = path_and_line.split(':', 2)
if line
# GitHub uses #L1-L2 format. Ensure it starts with #L
clean_line = line.gsub(/^#?L?/, '#L')
full_path = "#{path}#{clean_line}"
else
full_path = path
end
"[#{path_and_line}](#{clean_repo_url}/blob/main/#{full_path})"
end
end
# Ensure blank lines before and after headers if missing
content_body = content_body.gsub(/^(#+ .*)(\n)([^#\n])/, "\\1\n\n\\3")
# Ensure blank lines before tables only if not already part of a table
content_body = content_body.gsub(/([^\n|])\n\|/, "\\1\n\n|")
dir = File.join('wikis', wiki_id)
filename = "#{num}_#{slug}.md"
path = File.join(source_dir, dir, filename)
# Ensure the directory exists
FileUtils.mkdir_p(File.join(source_dir, dir))
# Metadata
related_pages_raw = page_data['relatedPages'] || []
related_pages = related_pages_raw.map do |rp_id|
info = permalink_map[rp_id]
if info
{ 'id' => rp_id, 'url' => info['permalink'], 'title' => rp_id.split('-').map(&:capitalize).join(' ') }
else
{ 'id' => rp_id }
end
end
# Prepend repository address to source file links
file_paths_raw = page_data['filePaths'] || []
file_paths = file_paths_raw.map do |fp|
if repo_url && !fp.empty?
# Assuming GitHub format for now: repo_url/blob/main/path
# We check if it's already a full URL just in case
if fp.start_with?('http')
{ 'path' => fp, 'url' => fp }
else
clean_repo_url = repo_url.chomp('/')
full_url = "#{clean_repo_url}/blob/main/#{fp}"
{ 'path' => fp, 'url' => full_url }
end
else
{ 'path' => fp }
end
end
# Pagination metadata for individual page
pagination_data = {}
if prev_page
prev_info = permalink_map[prev_page['id']]
pagination_data['previous'] = {
'title' => "#{prev_info['number']}-#{prev_info['slug']}",
'url' => prev_info['permalink']
}
end
if next_page
next_info = permalink_map[next_page['id']]
pagination_data['next'] = {
'title' => "#{next_info['number']}-#{next_info['slug']}",
'url' => next_info['permalink']
}
end
# Front matter content
front_matter = {
'layout' => 'wiki-page',
'title' => title,
'wiki_id' => wiki_id,
'page_id' => page_data['id'],
'permalink' => permalink,
'repository' => repo_url,
'left_sidebar' => 'wiki-nav',
'right_sidebar' => 'toc',
'right_sidebar_xl_only' => true,
'show_metadata' => false,
'show_graph' => true,
'related_pages' => related_pages,
'file_paths' => file_paths,
'pagination' => pagination_data
}
# Combine front matter and content
file_content = "#{front_matter.to_yaml}---\n\n#{content_body}"
# Write the file
File.write(path, file_content)
end
# Writes a paginated index page for the wiki.
#
# @param source_dir [String] the root directory of the site
# @param wiki_id [String] the identifier for the wiki
# @param segment [Array<Hash>] the list of pages for this index page
# @param page_num [Integer] the current page number
# @param total_pages [Integer] the total number of pages
# @param wiki_metadata [Hash] metadata for the entire wiki
# @param permalink_map [Hash] map of page IDs to calculated path info
# @return [void]
def write_paginated_page(source_dir, wiki_id, segment, page_num, total_pages, wiki_metadata, permalink_map)
# Determine directory path
if page_num == 1
dir = File.join('wikis', wiki_id)
permalink = "/wikis/#{wiki_id}/"
else
dir = File.join('wikis', wiki_id, 'page', page_num.to_s)
permalink = "/wikis/#{wiki_id}/page/#{page_num}/"
end
filename = 'index.md'
path = File.join(source_dir, dir, filename)
# Ensure the directory exists
FileUtils.mkdir_p(File.join(source_dir, dir))
# Build pagination metadata
pagination_meta = build_pagination_metadata(page_num, total_pages, wiki_id)
page_items = segment.map do |page_data|
page_id = page_data['id']
page_info = permalink_map[page_id]
{
'id' => page_id,
'title' => page_data['title'],
'number' => page_info['number'],
'slug' => page_info['slug'],
'importance' => page_data['importance'] || 'medium',
'excerpt' => extract_excerpt(page_data['content']),
'url' => page_info['permalink']
}
end
# Build complete front matter
front_matter = {
'layout' => 'wiki',
'title' => "#{wiki_id.split('-').map(&:capitalize).join(' ')} Wiki",
'wiki_id' => wiki_id,
'permalink' => permalink,
'pagination' => pagination_meta,
'show_graph' => true,
'repository' => wiki_metadata['repository'],
'generated_at' => wiki_metadata['generated_at'],
'total_wiki_pages' => wiki_metadata['page_count'],
'pages' => page_items
}
# Generate content (minimal for index pages)
content_body = "<!-- Paginated wiki index: page #{page_num} of #{total_pages} -->\n"
# Combine front matter and content
file_content = "#{front_matter.to_yaml}---\n\n#{content_body}"
# Write the file
File.write(path, file_content)
end
# Removes old directory-based structures to ensure the new flat structure is clean.
#
# @param source_dir [String] the root directory of the site
# @param wiki_id [String] the identifier for the wiki
# @param slugs [Array<String>] list of slugs to check for old directories
# @return [void]
def cleanup_old_directories(source_dir, wiki_id, slugs)
wiki_dir = File.join(source_dir, 'wikis', wiki_id)
return unless Dir.exist?(wiki_dir)
slugs.each do |slug|
slug_dir = File.join(wiki_dir, slug)
if Dir.exist?(slug_dir)
puts "Cleaning up old directory: #{slug_dir}"
FileUtils.rm_rf(slug_dir)
end
end
end
# Main execution entry point. Loads all JSON wiki definitions and triggers generation.
#
# @return [void]
def main
source_dir = Dir.pwd
data_dir = File.join(source_dir, '_data', 'wikis')
unless Dir.exist?(data_dir)
puts "Error: Data directory not found at #{data_dir}"
exit 1
end
Dir.glob(File.join(data_dir, '*.json')).each do |json_file|
wiki_id = File.basename(json_file, '.json')
wiki_data = JSON.parse(File.read(json_file))
next unless wiki_data['pages']
puts "Processing wiki: #{wiki_id}"
# Pre-calculate permalinks for all pages in this wiki
pages = wiki_data['pages']
permalink_map = {}
pages.each_with_index do |page_data, index|
page_id = page_data['id']
slug = validate_slug(slugify(page_id))
num = (index + 1).to_s.rjust(2, '0')
permalink_info = {
'slug' => slug,
'number' => num,
'permalink' => "/wikis/#{wiki_id}/#{num}-#{slug}/"
}
permalink_map[page_id] = permalink_info
# Enrich the original data for use in layouts (not strictly needed but kept for parity)
page_data['number'] = num
page_data['slug'] = slug
page_data['generated_permalink'] = permalink_info['permalink']
end
# Metadata for the wiki
repo_url = wiki_data.dig('metadata', 'repository')
# Generate individual wiki pages
pages.each_with_index do |page_data, index|
prev_page = index > 0 ? pages[index - 1] : nil
next_page = index < pages.length - 1 ? pages[index + 1] : nil
generate_page(source_dir, wiki_id, page_data, prev_page, next_page, permalink_map, repo_url)
end
# Generate paginated index pages
segments = calculate_pagination_segments(pages, ITEMS_PER_PAGE)
total_pages = segments.length
metadata = wiki_data['metadata'] || {}
segments.each_with_index do |segment, index|
page_num = index + 1
write_paginated_page(source_dir, wiki_id, segment, page_num, total_pages, metadata, permalink_map)
end
# Cleanup old structure if it exists
cleanup_old_directories(source_dir, wiki_id, permalink_map.values.map { |v| v['slug'] })
end
puts "Wiki generation complete."
end
if __FILE__ == $0
main
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment