Skip to content

Instantly share code, notes, and snippets.

@barelyknown
Created May 11, 2025 00:26
Show Gist options
  • Save barelyknown/a6e095831ccc085f95ee35f8802a39e3 to your computer and use it in GitHub Desktop.
Save barelyknown/a6e095831ccc085f95ee35f8802a39e3 to your computer and use it in GitHub Desktop.
db_structure_splitter.rake
require "fileutils"
require "active_support/core_ext/string/inflections"
namespace :db do
# db:split_structure
# ------------------
# This Rake task processes the `db/structure.sql` file generated by Rails (typically from `pg_dump`).
# Its primary goal is to split this monolithic SQL file into a more manageable, organized
# set of smaller SQL files. The organization is primarily table-centric, meaning that DDL
# statements related to a specific table (CREATE TABLE, ALTER TABLE, CREATE INDEX, CREATE TRIGGER,
# associated functions, comments) are grouped into a single file for that table.
#
# Output Structure:
# db/structure_split/
# ├── tables/ # Contains one .sql file per table, view, or materialized view
# │ └── public.users.sql
# │ └── public.posts.sql
# │ └── ...
# ├── _global/ # Contains DDL for objects not specific to a single table
# │ └── _extensions.sql
# │ └── _schemas.sql
# │ └── _types.sql # Global ENUMs and other custom types
# │ └── _functions.sql # Functions not clearly associated with a single table
# │ └── _sequences.sql # Sequences not OWNED BY a table column
# ├── misc/ # Contains DDL that couldn't be classified
# │ └── _misc.sql
# ├── _preamble.sql # Initial SET commands and other setup from structure.sql
# ├── _schema_migrations.sql # INSERT statements for schema_migrations table
# └── _postamble.sql # Final SET search_path command
#
# Processing Logic:
# 1. Pre-extraction:
# - Extracts `INSERT INTO "schema_migrations"` statements.
# - Extracts the final `SET search_path` command.
# - Extracts the initial "preamble" (SET commands before the first object definition).
# 2. Main Parsing Loop:
# - Iterates through `structure.sql`, identifying DDL objects based on `pg_dump`'s
# `-- Name: ...; Type: ...; Schema: ...;` comment headers.
# - Routes DDL for SCHEMAs, EXTENSIONs, TYPEs, TABLEs, VIEWs, MATERIALIZED VIEWs,
# SEQUENCEs (owned by tables), INDEXes, TRIGGERs, DEFAULTs, and CONSTRAINTs to
# their respective files or the `misc` file.
# - Stores TABLE, VIEW, and MATERIALIZED VIEW information in `known_tables_map` for later association.
# - Stores output paths for most objects in `object_output_paths`.
# - Defers processing of FUNCTIONs and COMMENTs ON FUNCTIONs.
# 3. Deferred Processing:
# - Processes all deferred FUNCTIONs:
# - Uses heuristics (name matching, parameter type matching, body content scanning)
# to associate functions with one or more tables.
# - Writes function DDL to associated table files or to `_global/_functions.sql`.
# - Processes all deferred COMMENTs ON FUNCTIONs, writing them to the same file(s)
# as their target functions.
#
# Key Data Structures:
# - `known_tables_map`: Maps qualified table/view names to their output file paths.
# - `object_output_paths`: Maps qualified object names to their type and an array of output file paths.
# - `deferred_functions`: Stores function DDL and metadata for processing after all tables are known.
# - `deferred_comments_on_functions`: Stores comment DDL for functions, processed after functions.
#
desc "Splits the db/structure.sql file into per-table files and other categories, with a table-centric organization."
task split_structure: :environment do
structure_file_path = Rails.root.join("db", "structure.sql")
output_base_dir = Rails.root.join("db", "structure_split")
unless File.exist?(structure_file_path)
warn "Error: #{structure_file_path} not found."
exit 1
end
puts "Cleaning up and creating output directory: #{output_base_dir}"
FileUtils.rm_rf(output_base_dir)
# Create base output directories
FileUtils.mkdir_p(output_base_dir.join("tables"))
FileUtils.mkdir_p(output_base_dir.join("_global"))
FileUtils.mkdir_p(output_base_dir.join("misc"))
sql_content = File.read(structure_file_path)
# --- Phase 1: Extract schema_migrations and final SET search_path ---
# These are special cases that are handled separately from the main DDL object parsing.
schema_migrations_content = ""
final_set_search_path_content = ""
# Regex to capture all INSERT INTO "schema_migrations" statements.
# These are typically grouped together but might be interspersed with comments.
schema_migrations_regex = /^(INSERT INTO "schema_migrations" .*?;(?:\s*--.*?\n|\s*\n)*)+/m
if sql_content.match(schema_migrations_regex)
schema_migrations_content = $1.strip
sql_content = sql_content.sub(schema_migrations_content, "") # Remove from main content for further processing
end
# Extract the final "SET search_path = ..." line, usually at the very end of the file.
if sql_content =~ /(\nSET search_path = .*?;\s*)$/m
final_set_search_path_content = $1.strip
sql_content = sql_content.sub(final_set_search_path_content, "").strip # Remove and clean up
end
# --- Phase 2: Process the rest of the content ---
# Open file handles for global DDL categories. These files will accumulate DDL
# that isn't specific to a single table or is explicitly global (like extensions).
file_handles = {
preamble: File.open(output_base_dir.join("_preamble.sql"), "w"),
extensions: File.open(output_base_dir.join("_global", "_extensions.sql"), "w"),
schemas: File.open(output_base_dir.join("_global", "_schemas.sql"), "w"),
global_types: File.open(output_base_dir.join("_global", "_types.sql"), "w"),
global_sequences: File.open(output_base_dir.join("_global", "_sequences.sql"), "w"),
global_functions: File.open(output_base_dir.join("_global", "_functions.sql"), "w"),
misc: File.open(output_base_dir.join("misc", "_misc.sql"), "w") # Catch-all for unclassified DDL
}
# `known_tables_map`: Stores "schema.table_simple_name" => Pathname object of the table's SQL file.
# Used to quickly find where to write DDL related to a known table (e.g., indexes, triggers, comments).
known_tables_map = {}
# `object_output_paths`: Stores qualified object names (e.g., "public.my_type", "public.my_function")
# as keys. Values are hashes like { type: "TYPE", path: ["/path/to/type_file.sql"] }.
# The `path` is an array because functions can be written to multiple table files.
# This map is crucial for routing comments to the correct file(s) where the commented object was written.
object_output_paths = {}
# `deferred_functions` and `deferred_comments_on_functions`:
# Functions and comments on functions are processed *after* the main loop.
# This is because function-to-table association heuristics rely on `known_tables_map` being fully populated,
# and comments on functions rely on `object_output_paths` for functions being populated.
deferred_functions = []
deferred_comments_on_functions = []
# Extract the "preamble": SQL commands (mostly SET statements) at the beginning of
# structure.sql, before the first DDL object definition.
current_pos = 0
first_header_match = sql_content.match(/^--\n-- Name:/m) # Standard pg_dump object header
if first_header_match
preamble_text = sql_content[0...first_header_match.begin(0)].strip
file_handles[:preamble].puts(preamble_text + "\n\n") unless preamble_text.empty?
current_pos = first_header_match.begin(0) # Start main parsing from here
else
# If no standard headers are found, assume the whole remaining content is preamble (unlikely for valid structure.sql)
file_handles[:preamble].puts(sql_content.strip + "\n\n") unless sql_content.strip.empty?
sql_content = "" # No more content to parse
end
# Main parsing loop: Iterates over DDL objects identified by pg_dump's comment headers.
# Regex captures:
# 1. object_name_with_args: e.g., "users", "my_function(integer, text)"
# 2. object_type: e.g., "TABLE", "FUNCTION", "INDEX"
# 3. schema_name_raw: e.g., "public", "-" (for objects not in a schema, like extensions)
# 4. ddl_and_comments: The actual SQL DDL statement(s) for the object.
sql_content[current_pos..].scan(
/^--\n-- Name: (.*?); Type: (.*?); Schema: (.*?);(?: Owner:.*?)?\n--\n([\s\S]*?)(?=(?:^--\n-- Name:|\z))/m
).each do |object_name_with_args, object_type, schema_name_raw, ddl_and_comments|
# Reconstruct the full block text including the header for writing to files.
full_block_text = "--\n-- Name: #{object_name_with_args}; Type: #{object_type}; Schema: #{schema_name_raw}; Owner: -\n--\n#{ddl_and_comments.strip}"
# Normalize names: remove quotes, extract simple name from name with arguments.
object_name_simple = object_name_with_args.split("(").first.strip.delete('"')
schema_name = if schema_name_raw.nil? || schema_name_raw.strip == "-" || schema_name_raw.strip.empty?
"public" # Default to 'public' if schema is not specified or is '-'
else
schema_name_raw.strip.delete('"')
end
qualified_object_name_simple = "#{schema_name}.#{object_name_simple}"
# Sanitize names for use in filenames (replace non-alphanumeric characters).
sanitized_schema_name = schema_name.gsub(/[^a-zA-Z0-9_.-]+/, "_")
sanitized_object_name_simple = object_name_simple.gsub(/[^a-zA-Z0-9_.-]+/, "_")
case object_type
when "SCHEMA"
file_handles[:schemas].puts(full_block_text + "\n\n")
# Schemas are global, not typically associated with a single table.
when "EXTENSION"
file_handles[:extensions].puts(full_block_text + "\n\n")
# Extensions are global.
# Record its path for potential comments ON EXTENSION.
object_output_paths[qualified_object_name_simple] = {type: "EXTENSION", path: [file_handles[:extensions].path.to_s]}
when "TYPE"
# Custom types (ENUMs, etc.) are generally written to a global types file.
# Associating them reliably with specific tables can be complex.
output_file_path = file_handles[:global_types].path
file_handles[:global_types].puts(full_block_text + "\n\n")
object_output_paths[qualified_object_name_simple] = {type: "TYPE", path: [output_file_path.to_s]}
when "TABLE", "VIEW", "MATERIALIZED VIEW"
# Tables, Views, and Materialized Views each get their own file in the 'tables/' directory.
# They are central to the organization.
table_file_name = "#{sanitized_schema_name}.#{sanitized_object_name_simple}.sql"
output_file_path = output_base_dir.join("tables", table_file_name)
File.open(output_file_path, "a") { |f| f.puts(full_block_text + "\n\n") }
# Add to known_tables_map for associating other DDL (indexes, triggers, functions, comments).
known_tables_map[qualified_object_name_simple] = output_file_path
# Record its path for comments ON TABLE/VIEW.
object_output_paths[qualified_object_name_simple] = {type: object_type, path: [output_file_path.to_s]}
when "SEQUENCE"
# Sequences can be "OWNED BY" a table column, or be global.
output_file_path = nil # Initialize
owned_by_match = ddl_and_comments.match(/OWNED BY\s+((?:[\w"-]+\.)?[\w"-]+)\.([\w"-]+);/m)
if owned_by_match
# If OWNED BY a table, try to write it to that table's file.
owner_table_ref, _owner_column = owned_by_match.captures
owner_schema = owner_table_ref.include?(".") ? owner_table_ref.split(".").first.delete('"') : schema_name
owner_table_simple = owner_table_ref.split(".").last.delete('"')
qualified_owner = "#{owner_schema}.#{owner_table_simple}"
if known_tables_map[qualified_owner]
output_file_path = known_tables_map[qualified_owner]
File.open(output_file_path, "a") { |f| f.puts(full_block_text + "\n\n") }
else
# If owning table not found (should be rare if structure.sql is ordered), put in global sequences.
output_file_path = file_handles[:global_sequences].path
file_handles[:global_sequences].puts(full_block_text + "\n\n")
end
else
# If not OWNED BY, it's a global sequence.
output_file_path = file_handles[:global_sequences].path
file_handles[:global_sequences].puts(full_block_text + "\n\n")
end
object_output_paths[qualified_object_name_simple] = {type: "SEQUENCE", path: [output_file_path.to_s]}
when "INDEX", "TRIGGER", "DEFAULT", "CONSTRAINT"
# These objects are typically ALTER TABLE statements or CREATE INDEX/TRIGGER ON a table.
# They are associated with a specific table.
table_ref = nil # Will store the qualified name of the target table.
# Extract target table name from DDL.
match = if object_type == "INDEX" || object_type == "TRIGGER"
# For CREATE INDEX ... ON table_name or CREATE TRIGGER ... ON table_name
# The regex specifically looks for " ON " (case-insensitive) followed by the table name.
ddl_and_comments.match(/\sON\s+((?:[\w"-]+\.)?[\w"-]+)/im)
else # DEFAULT, CONSTRAINT (usually ALTER TABLE)
ddl_and_comments.match(/ALTER TABLE(?: ONLY)?\s+((?:[\w"-]+\.)?[\w"-]+)\s+/)
end
table_ref = match[1] if match && match[1]
if table_ref
target_schema, target_simple = if table_ref.include?(".")
parts = table_ref.delete('"').split(".", 2)
# Ensure parts has two elements, otherwise default schema
(parts.size == 2) ? parts : [schema_name, parts.first]
else
[schema_name, table_ref.delete('"')] # Default to current object's schema
end
qualified_target = "#{target_schema}.#{target_simple}"
if known_tables_map[qualified_target]
# If target table is known, append to its file.
File.open(known_tables_map[qualified_target], "a") { |f| f.puts(full_block_text + "\n\n") }
else
# If target table not found (e.g., for a view not yet processed or a complex reference), put in misc.
warn "Warning: Target table/view '#{qualified_target}' for #{object_type} '#{object_name_with_args}' not found in known_tables_map. Appending to misc."
file_handles[:misc].puts(full_block_text + "\n\n")
end
else
# If table reference couldn't be parsed, put in misc.
warn "Warning: Could not determine target table for #{object_type} '#{object_name_with_args}'. Appending to misc."
file_handles[:misc].puts(full_block_text + "\n\n")
end
when "FUNCTION"
# Defer function processing until all tables are known, as function association
# heuristics rely on the complete `known_tables_map`.
deferred_functions << {
full_block_text: full_block_text,
object_name_simple: object_name_simple, # e.g., my_function
qualified_object_name_simple: qualified_object_name_simple, # e.g., public.my_function
ddl_and_comments: ddl_and_comments, # The CREATE FUNCTION ... statement
object_name_with_args: object_name_with_args, # e.g., my_function(integer, text)
schema_name: schema_name # Schema of the function itself
}
when "COMMENT"
# Comments can be on various object types.
# Comments on functions are deferred. Other comments are processed inline.
paths_to_write_comment = [] # Collect all file paths where this comment should be written.
is_comment_on_function = false
# Check if it's a COMMENT ON FUNCTION specifically for deferral.
if /COMMENT ON FUNCTION\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/.match?(ddl_and_comments)
is_comment_on_function = true
end
if is_comment_on_function
# Defer comments on functions until functions themselves are processed and their output paths known.
deferred_comments_on_functions << {
full_block_text: full_block_text, # The entire comment block from structure.sql
ddl_and_comments: ddl_and_comments, # The "COMMENT ON FUNCTION..." statement itself
object_name_with_args: object_name_with_args, # Name from comment's header (e.g., "my_function(integer, text)")
schema_name: schema_name # Schema from comment's header
}
else
# Process other comments (TABLE, COLUMN, EXTENSION, SCHEMA, TYPE, SEQUENCE, VIEW, INDEX, etc.) inline.
# Determine the target object of the comment and find its file path.
if (match_data = ddl_and_comments.match(/COMMENT ON TABLE ((?:[\w"-]+\.)?[\w"-]+)/))
table_ref = match_data[1]
schema_t, simple_t = table_ref.include?(".") ?
table_ref.delete('"').split(".", 2) :
[schema_name, table_ref.delete('"')] # Default to comment's schema if not qualified
qname = "#{schema_t}.#{simple_t}"
paths_to_write_comment << known_tables_map[qname].to_s if known_tables_map[qname]
elsif (match_data = ddl_and_comments.match(/COMMENT ON COLUMN ((?:[\w"-]+\.)?[\w"-]+)\.[\w"-]+/))
table_ref = match_data[1] # This is the table part of "schema.table.column" or "table.column"
schema_t, simple_t = table_ref.include?(".") ?
table_ref.delete('"').split(".", 2) :
[schema_name, table_ref.delete('"')]
qname = "#{schema_t}.#{simple_t}"
paths_to_write_comment << known_tables_map[qname].to_s if known_tables_map[qname]
elsif /COMMENT ON EXTENSION/.match?(ddl_and_comments)
# For COMMENT ON EXTENSION, the target name is in object_name_with_args from the header
# We need to find its path in object_output_paths
if object_output_paths[qualified_object_name_simple] && object_output_paths[qualified_object_name_simple][:path]&.any?
paths_to_write_comment.concat(object_output_paths[qualified_object_name_simple][:path])
else
# Fallback if extension wasn't recorded, though it should be.
paths_to_write_comment << file_handles[:extensions].path.to_s
end
elsif /COMMENT ON SCHEMA/.match?(ddl_and_comments)
# For COMMENT ON SCHEMA, target name is in object_name_with_args
if object_output_paths[qualified_object_name_simple] && object_output_paths[qualified_object_name_simple][:path]&.any?
paths_to_write_comment.concat(object_output_paths[qualified_object_name_simple][:path])
else
paths_to_write_comment << file_handles[:schemas].path.to_s
end
elsif (match_data = ddl_and_comments.match(/COMMENT ON (TYPE|SEQUENCE|VIEW|MATERIALIZED VIEW)\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/))
commented_obj_type = match_data[1]
target_ref = match_data[2].split("(").first.delete('"') # Get name without args
schema_of_target, simple_of_target = if target_ref.include?(".")
target_ref.split(".", 2)
else
[schema_name, target_ref] # Default to comment's schema
end
qname = "#{schema_of_target}.#{simple_of_target}"
if object_output_paths[qname] && object_output_paths[qname][:path]&.any?
paths_to_write_comment.concat(object_output_paths[qname][:path])
else
warn "Warning: Path for commented object #{qname} (type #{commented_obj_type}) not found. Comment: #{object_name_with_args}"
end
elsif (match_data = ddl_and_comments.match(/COMMENT ON (?:INDEX|TRIGGER|CONSTRAINT)\s+[\w"-]+\s+ON\s+((?:[\w"-]+\.)?[\w"-]+)/))
# Comment on INDEX/TRIGGER/CONSTRAINT: "COMMENT ON INDEX my_index ON my_table"
# The target is the table.
table_ref = match_data[1] # The table name after ON
schema_t, simple_t = table_ref.include?(".") ?
table_ref.delete('"').split(".", 2) :
[schema_name, table_ref.delete('"')]
qname = "#{schema_t}.#{simple_t}"
if known_tables_map[qname]
paths_to_write_comment << known_tables_map[qname].to_s
end
end
if paths_to_write_comment.any?
paths_to_write_comment.uniq.each do |individual_path_str|
# Check if it's one of the globally managed file handles first
handle_pair = file_handles.find { |_k, fh| fh.path.to_s == individual_path_str.to_s }
if handle_pair
file_handles[handle_pair.first].puts(full_block_text + "\n\n")
else
# Otherwise, it's a table-specific file (or view, etc.)
File.open(individual_path_str, "a") { |f| f.puts(full_block_text + "\n\n") }
end
end
else
# If it's not a comment on function and still no path, then it's misc.
warn "Warning: Could not determine target file for non-function COMMENT: #{object_name_with_args}. DDL: #{ddl_and_comments.lines.first.strip}. Appending to misc."
file_handles[:misc].puts(full_block_text + "\n\n")
end
end # end if is_comment_on_function else
else
# Any object type not explicitly handled above goes to misc.
file_handles[:misc].puts(full_block_text + "\n\n")
end
end # end main parsing loop
# --- Process deferred functions ---
# Now that all TABLEs/VIEWs are in `known_tables_map`, we can process functions
# and try to associate them with the correct table files.
deferred_functions.each do |func_data|
full_block_text = func_data[:full_block_text]
object_name_simple = func_data[:object_name_simple]
qualified_object_name_simple = func_data[:qualified_object_name_simple]
ddl_and_comments = func_data[:ddl_and_comments] # This is the CREATE FUNCTION ...
object_name_with_args = func_data[:object_name_with_args]
schema_name = func_data[:schema_name] # Schema of the function itself
associated_table_files = Set.new # Use a Set to store unique file paths
# Heuristic 1: Function name contains a known table name (case-insensitive).
# e.g., function "update_users_last_login" associated with "users" table.
known_tables_map.each do |q_table_name, table_path|
_tbl_schema, simple_table_name = q_table_name.split(".", 2)
if object_name_simple.downcase.include?(simple_table_name.downcase)
associated_table_files.add(table_path.to_s)
end
end
# Heuristic 2: Function parameter types match known table types (schema.table or table).
# e.g., function "process_order(o public.orders)" associated with "public.orders" table.
if object_name_with_args.include?("(") && object_name_with_args.include?(")")
params_list_str = object_name_with_args[
object_name_with_args.index("(") + 1..object_name_with_args.rindex(")") - 1
]
# Split parameters, handling cases like "integer, character varying"
param_definitions = params_list_str.scan(/(?:[^,(]+(?:\([^)]+\))?)+/) # More robust split
param_definitions.each do |param_def|
# Parameter definition can be "param_name type" or just "type". We need the type.
# Regex to extract the last word part, which is likely the type,
# handling schema-qualified types like "public.my_type" or arrays "text[]".
type_match = param_def.strip.match(/(?:([\w"-]+\.)?[\w"-]+(?:\[\])?)$/)
type_part = type_match[0] if type_match
next if type_part.blank?
potential_q_type_name = if type_part.include?(".")
type_part # Already schema-qualified
else
"#{schema_name}.#{type_part}" # Qualify with function's schema
end
if known_tables_map[potential_q_type_name] # Check if this type is a known table/view
associated_table_files.add(known_tables_map[potential_q_type_name].to_s)
end
end
end
# Heuristic 3: Scan function body (DDL) for mentions of known table names (case-insensitive).
# This is powerful for triggers or functions that manipulate tables not in their name/signature.
known_tables_map.each do |q_table_name, table_path|
_tbl_schema, simple_table_name_from_map = q_table_name.split(".", 2)
# Regex to find the table name as a whole word (quoted or unquoted).
escaped_stn = Regexp.escape(simple_table_name_from_map)
body_scan_regex_str = "\"#{escaped_stn}\"|\\b#{escaped_stn}\\b" # \b for word boundary
if ddl_and_comments.match?(Regexp.new(body_scan_regex_str, Regexp::IGNORECASE))
associated_table_files.add(table_path.to_s)
end
end
written_function_paths = [] # Record where this function is actually written.
if associated_table_files.empty?
# If no associations found, write to the global functions file.
global_func_path = file_handles[:global_functions].path
file_handles[:global_functions].puts(full_block_text + "\n\n")
written_function_paths << global_func_path.to_s
else
# Write the function to all associated table files.
associated_table_files.each do |table_file_path_str|
File.open(table_file_path_str, "a") { |f| f.puts(full_block_text + "\n\n") }
written_function_paths << table_file_path_str
end
end
# Store the path(s) for comment routing.
object_output_paths[qualified_object_name_simple] = {type: "FUNCTION", path: written_function_paths}
end # end deferred_functions loop
# --- Process deferred comments on functions ---
# Now that functions are processed and their paths recorded in `object_output_paths`,
# we can process comments on these functions.
deferred_comments_on_functions.each do |comment_data|
full_block_text = comment_data[:full_block_text] # The full COMMENT object block
ddl_and_comments = comment_data[:ddl_and_comments] # The "COMMENT ON FUNCTION..." statement
schema_name_of_comment_obj = comment_data[:schema_name] # Schema of the COMMENT object itself
object_name_with_args_of_comment_obj = comment_data[:object_name_with_args] # Name from COMMENT object's header
paths_to_write_comment = []
# Extract the target function name from the "COMMENT ON FUNCTION ..." statement.
if (match_data = ddl_and_comments.match(/COMMENT ON FUNCTION\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/))
target_ref_with_args = match_data[1] # e.g., "public.my_func(integer)" or "my_func(integer)"
target_ref_simple = target_ref_with_args.split("(").first.delete('"') # e.g., "public.my_func" or "my_func"
# Determine the schema of the target function.
# If "public.my_func", schema is "public". If "my_func", schema is `schema_name_of_comment_obj`.
schema_of_target_func, simple_of_target_func = if target_ref_simple.include?(".")
target_ref_simple.split(".", 2)
else
[schema_name_of_comment_obj, target_ref_simple]
end
qname_of_func = "#{schema_of_target_func}.#{simple_of_target_func}" # Qualified name of the function being commented on
# Look up the function's output path(s).
if object_output_paths[qname_of_func] && object_output_paths[qname_of_func][:path]&.any?
paths_to_write_comment.concat(object_output_paths[qname_of_func][:path])
else
warn "Warning: Path for commented function #{qname_of_func} not found during deferred processing. Comment object: #{object_name_with_args_of_comment_obj}"
end
end
if paths_to_write_comment.any?
paths_to_write_comment.uniq.each do |individual_path_str|
# Check if it's one of the globally managed file handles
handle_pair = file_handles.find { |_k, fh| fh.path.to_s == individual_path_str.to_s }
if handle_pair
file_handles[handle_pair.first].puts(full_block_text + "\n\n")
else
# Otherwise, it's a table-specific file.
File.open(individual_path_str, "a") { |f| f.puts(full_block_text + "\n\n") }
end
end
else
# If comment target couldn't be resolved, put in misc.
warn "Warning: Could not determine target file for deferred COMMENT ON FUNCTION: #{object_name_with_args_of_comment_obj}. DDL: #{ddl_and_comments.lines.first.strip}. Appending to misc."
file_handles[:misc].puts(full_block_text + "\n\n")
end
end # end deferred_comments_on_functions loop
# --- Finalization ---
# Close all initially opened file handles for global files.
file_handles.each_value(&:close)
# Write the extracted schema_migrations content to its own file.
unless schema_migrations_content.empty?
File.write(output_base_dir.join("_schema_migrations.sql"), schema_migrations_content.strip + "\n")
end
# Write the extracted final SET search_path command to its own file.
unless final_set_search_path_content.empty?
File.write(output_base_dir.join("_postamble.sql"), final_set_search_path_content.strip + "\n")
end
puts "db/structure.sql has been split into files in #{output_base_dir}"
puts "Global, non-table-specific items are in the '_global' directory."
puts "Review misc/_misc.sql for any unclassified DDL statements."
puts "Note: Complex function/type to table associations may require manual review."
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment