Created
May 11, 2025 00:26
-
-
Save barelyknown/a6e095831ccc085f95ee35f8802a39e3 to your computer and use it in GitHub Desktop.
db_structure_splitter.rake
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "fileutils" | |
require "active_support/core_ext/string/inflections" | |
namespace :db do | |
# db:split_structure | |
# ------------------ | |
# This Rake task processes the `db/structure.sql` file generated by Rails (typically from `pg_dump`). | |
# Its primary goal is to split this monolithic SQL file into a more manageable, organized | |
# set of smaller SQL files. The organization is primarily table-centric, meaning that DDL | |
# statements related to a specific table (CREATE TABLE, ALTER TABLE, CREATE INDEX, CREATE TRIGGER, | |
# associated functions, comments) are grouped into a single file for that table. | |
# | |
# Output Structure: | |
# db/structure_split/ | |
# ├── tables/ # Contains one .sql file per table, view, or materialized view | |
# │ └── public.users.sql | |
# │ └── public.posts.sql | |
# │ └── ... | |
# ├── _global/ # Contains DDL for objects not specific to a single table | |
# │ └── _extensions.sql | |
# │ └── _schemas.sql | |
# │ └── _types.sql # Global ENUMs and other custom types | |
# │ └── _functions.sql # Functions not clearly associated with a single table | |
# │ └── _sequences.sql # Sequences not OWNED BY a table column | |
# ├── misc/ # Contains DDL that couldn't be classified | |
# │ └── _misc.sql | |
# ├── _preamble.sql # Initial SET commands and other setup from structure.sql | |
# ├── _schema_migrations.sql # INSERT statements for schema_migrations table | |
# └── _postamble.sql # Final SET search_path command | |
# | |
# Processing Logic: | |
# 1. Pre-extraction: | |
# - Extracts `INSERT INTO "schema_migrations"` statements. | |
# - Extracts the final `SET search_path` command. | |
# - Extracts the initial "preamble" (SET commands before the first object definition). | |
# 2. Main Parsing Loop: | |
# - Iterates through `structure.sql`, identifying DDL objects based on `pg_dump`'s | |
# `-- Name: ...; Type: ...; Schema: ...;` comment headers. | |
# - Routes DDL for SCHEMAs, EXTENSIONs, TYPEs, TABLEs, VIEWs, MATERIALIZED VIEWs, | |
# SEQUENCEs (owned by tables), INDEXes, TRIGGERs, DEFAULTs, and CONSTRAINTs to | |
# their respective files or the `misc` file. | |
# - Stores TABLE, VIEW, and MATERIALIZED VIEW information in `known_tables_map` for later association. | |
# - Stores output paths for most objects in `object_output_paths`. | |
# - Defers processing of FUNCTIONs and COMMENTs ON FUNCTIONs. | |
# 3. Deferred Processing: | |
# - Processes all deferred FUNCTIONs: | |
# - Uses heuristics (name matching, parameter type matching, body content scanning) | |
# to associate functions with one or more tables. | |
# - Writes function DDL to associated table files or to `_global/_functions.sql`. | |
# - Processes all deferred COMMENTs ON FUNCTIONs, writing them to the same file(s) | |
# as their target functions. | |
# | |
# Key Data Structures: | |
# - `known_tables_map`: Maps qualified table/view names to their output file paths. | |
# - `object_output_paths`: Maps qualified object names to their type and an array of output file paths. | |
# - `deferred_functions`: Stores function DDL and metadata for processing after all tables are known. | |
# - `deferred_comments_on_functions`: Stores comment DDL for functions, processed after functions. | |
# | |
desc "Splits the db/structure.sql file into per-table files and other categories, with a table-centric organization." | |
task split_structure: :environment do | |
structure_file_path = Rails.root.join("db", "structure.sql") | |
output_base_dir = Rails.root.join("db", "structure_split") | |
unless File.exist?(structure_file_path) | |
warn "Error: #{structure_file_path} not found." | |
exit 1 | |
end | |
puts "Cleaning up and creating output directory: #{output_base_dir}" | |
FileUtils.rm_rf(output_base_dir) | |
# Create base output directories | |
FileUtils.mkdir_p(output_base_dir.join("tables")) | |
FileUtils.mkdir_p(output_base_dir.join("_global")) | |
FileUtils.mkdir_p(output_base_dir.join("misc")) | |
sql_content = File.read(structure_file_path) | |
# --- Phase 1: Extract schema_migrations and final SET search_path --- | |
# These are special cases that are handled separately from the main DDL object parsing. | |
schema_migrations_content = "" | |
final_set_search_path_content = "" | |
# Regex to capture all INSERT INTO "schema_migrations" statements. | |
# These are typically grouped together but might be interspersed with comments. | |
schema_migrations_regex = /^(INSERT INTO "schema_migrations" .*?;(?:\s*--.*?\n|\s*\n)*)+/m | |
if sql_content.match(schema_migrations_regex) | |
schema_migrations_content = $1.strip | |
sql_content = sql_content.sub(schema_migrations_content, "") # Remove from main content for further processing | |
end | |
# Extract the final "SET search_path = ..." line, usually at the very end of the file. | |
if sql_content =~ /(\nSET search_path = .*?;\s*)$/m | |
final_set_search_path_content = $1.strip | |
sql_content = sql_content.sub(final_set_search_path_content, "").strip # Remove and clean up | |
end | |
# --- Phase 2: Process the rest of the content --- | |
# Open file handles for global DDL categories. These files will accumulate DDL | |
# that isn't specific to a single table or is explicitly global (like extensions). | |
file_handles = { | |
preamble: File.open(output_base_dir.join("_preamble.sql"), "w"), | |
extensions: File.open(output_base_dir.join("_global", "_extensions.sql"), "w"), | |
schemas: File.open(output_base_dir.join("_global", "_schemas.sql"), "w"), | |
global_types: File.open(output_base_dir.join("_global", "_types.sql"), "w"), | |
global_sequences: File.open(output_base_dir.join("_global", "_sequences.sql"), "w"), | |
global_functions: File.open(output_base_dir.join("_global", "_functions.sql"), "w"), | |
misc: File.open(output_base_dir.join("misc", "_misc.sql"), "w") # Catch-all for unclassified DDL | |
} | |
# `known_tables_map`: Stores "schema.table_simple_name" => Pathname object of the table's SQL file. | |
# Used to quickly find where to write DDL related to a known table (e.g., indexes, triggers, comments). | |
known_tables_map = {} | |
# `object_output_paths`: Stores qualified object names (e.g., "public.my_type", "public.my_function") | |
# as keys. Values are hashes like { type: "TYPE", path: ["/path/to/type_file.sql"] }. | |
# The `path` is an array because functions can be written to multiple table files. | |
# This map is crucial for routing comments to the correct file(s) where the commented object was written. | |
object_output_paths = {} | |
# `deferred_functions` and `deferred_comments_on_functions`: | |
# Functions and comments on functions are processed *after* the main loop. | |
# This is because function-to-table association heuristics rely on `known_tables_map` being fully populated, | |
# and comments on functions rely on `object_output_paths` for functions being populated. | |
deferred_functions = [] | |
deferred_comments_on_functions = [] | |
# Extract the "preamble": SQL commands (mostly SET statements) at the beginning of | |
# structure.sql, before the first DDL object definition. | |
current_pos = 0 | |
first_header_match = sql_content.match(/^--\n-- Name:/m) # Standard pg_dump object header | |
if first_header_match | |
preamble_text = sql_content[0...first_header_match.begin(0)].strip | |
file_handles[:preamble].puts(preamble_text + "\n\n") unless preamble_text.empty? | |
current_pos = first_header_match.begin(0) # Start main parsing from here | |
else | |
# If no standard headers are found, assume the whole remaining content is preamble (unlikely for valid structure.sql) | |
file_handles[:preamble].puts(sql_content.strip + "\n\n") unless sql_content.strip.empty? | |
sql_content = "" # No more content to parse | |
end | |
# Main parsing loop: Iterates over DDL objects identified by pg_dump's comment headers. | |
# Regex captures: | |
# 1. object_name_with_args: e.g., "users", "my_function(integer, text)" | |
# 2. object_type: e.g., "TABLE", "FUNCTION", "INDEX" | |
# 3. schema_name_raw: e.g., "public", "-" (for objects not in a schema, like extensions) | |
# 4. ddl_and_comments: The actual SQL DDL statement(s) for the object. | |
sql_content[current_pos..].scan( | |
/^--\n-- Name: (.*?); Type: (.*?); Schema: (.*?);(?: Owner:.*?)?\n--\n([\s\S]*?)(?=(?:^--\n-- Name:|\z))/m | |
).each do |object_name_with_args, object_type, schema_name_raw, ddl_and_comments| | |
# Reconstruct the full block text including the header for writing to files. | |
full_block_text = "--\n-- Name: #{object_name_with_args}; Type: #{object_type}; Schema: #{schema_name_raw}; Owner: -\n--\n#{ddl_and_comments.strip}" | |
# Normalize names: remove quotes, extract simple name from name with arguments. | |
object_name_simple = object_name_with_args.split("(").first.strip.delete('"') | |
schema_name = if schema_name_raw.nil? || schema_name_raw.strip == "-" || schema_name_raw.strip.empty? | |
"public" # Default to 'public' if schema is not specified or is '-' | |
else | |
schema_name_raw.strip.delete('"') | |
end | |
qualified_object_name_simple = "#{schema_name}.#{object_name_simple}" | |
# Sanitize names for use in filenames (replace non-alphanumeric characters). | |
sanitized_schema_name = schema_name.gsub(/[^a-zA-Z0-9_.-]+/, "_") | |
sanitized_object_name_simple = object_name_simple.gsub(/[^a-zA-Z0-9_.-]+/, "_") | |
case object_type | |
when "SCHEMA" | |
file_handles[:schemas].puts(full_block_text + "\n\n") | |
# Schemas are global, not typically associated with a single table. | |
when "EXTENSION" | |
file_handles[:extensions].puts(full_block_text + "\n\n") | |
# Extensions are global. | |
# Record its path for potential comments ON EXTENSION. | |
object_output_paths[qualified_object_name_simple] = {type: "EXTENSION", path: [file_handles[:extensions].path.to_s]} | |
when "TYPE" | |
# Custom types (ENUMs, etc.) are generally written to a global types file. | |
# Associating them reliably with specific tables can be complex. | |
output_file_path = file_handles[:global_types].path | |
file_handles[:global_types].puts(full_block_text + "\n\n") | |
object_output_paths[qualified_object_name_simple] = {type: "TYPE", path: [output_file_path.to_s]} | |
when "TABLE", "VIEW", "MATERIALIZED VIEW" | |
# Tables, Views, and Materialized Views each get their own file in the 'tables/' directory. | |
# They are central to the organization. | |
table_file_name = "#{sanitized_schema_name}.#{sanitized_object_name_simple}.sql" | |
output_file_path = output_base_dir.join("tables", table_file_name) | |
File.open(output_file_path, "a") { |f| f.puts(full_block_text + "\n\n") } | |
# Add to known_tables_map for associating other DDL (indexes, triggers, functions, comments). | |
known_tables_map[qualified_object_name_simple] = output_file_path | |
# Record its path for comments ON TABLE/VIEW. | |
object_output_paths[qualified_object_name_simple] = {type: object_type, path: [output_file_path.to_s]} | |
when "SEQUENCE" | |
# Sequences can be "OWNED BY" a table column, or be global. | |
output_file_path = nil # Initialize | |
owned_by_match = ddl_and_comments.match(/OWNED BY\s+((?:[\w"-]+\.)?[\w"-]+)\.([\w"-]+);/m) | |
if owned_by_match | |
# If OWNED BY a table, try to write it to that table's file. | |
owner_table_ref, _owner_column = owned_by_match.captures | |
owner_schema = owner_table_ref.include?(".") ? owner_table_ref.split(".").first.delete('"') : schema_name | |
owner_table_simple = owner_table_ref.split(".").last.delete('"') | |
qualified_owner = "#{owner_schema}.#{owner_table_simple}" | |
if known_tables_map[qualified_owner] | |
output_file_path = known_tables_map[qualified_owner] | |
File.open(output_file_path, "a") { |f| f.puts(full_block_text + "\n\n") } | |
else | |
# If owning table not found (should be rare if structure.sql is ordered), put in global sequences. | |
output_file_path = file_handles[:global_sequences].path | |
file_handles[:global_sequences].puts(full_block_text + "\n\n") | |
end | |
else | |
# If not OWNED BY, it's a global sequence. | |
output_file_path = file_handles[:global_sequences].path | |
file_handles[:global_sequences].puts(full_block_text + "\n\n") | |
end | |
object_output_paths[qualified_object_name_simple] = {type: "SEQUENCE", path: [output_file_path.to_s]} | |
when "INDEX", "TRIGGER", "DEFAULT", "CONSTRAINT" | |
# These objects are typically ALTER TABLE statements or CREATE INDEX/TRIGGER ON a table. | |
# They are associated with a specific table. | |
table_ref = nil # Will store the qualified name of the target table. | |
# Extract target table name from DDL. | |
match = if object_type == "INDEX" || object_type == "TRIGGER" | |
# For CREATE INDEX ... ON table_name or CREATE TRIGGER ... ON table_name | |
# The regex specifically looks for " ON " (case-insensitive) followed by the table name. | |
ddl_and_comments.match(/\sON\s+((?:[\w"-]+\.)?[\w"-]+)/im) | |
else # DEFAULT, CONSTRAINT (usually ALTER TABLE) | |
ddl_and_comments.match(/ALTER TABLE(?: ONLY)?\s+((?:[\w"-]+\.)?[\w"-]+)\s+/) | |
end | |
table_ref = match[1] if match && match[1] | |
if table_ref | |
target_schema, target_simple = if table_ref.include?(".") | |
parts = table_ref.delete('"').split(".", 2) | |
# Ensure parts has two elements, otherwise default schema | |
(parts.size == 2) ? parts : [schema_name, parts.first] | |
else | |
[schema_name, table_ref.delete('"')] # Default to current object's schema | |
end | |
qualified_target = "#{target_schema}.#{target_simple}" | |
if known_tables_map[qualified_target] | |
# If target table is known, append to its file. | |
File.open(known_tables_map[qualified_target], "a") { |f| f.puts(full_block_text + "\n\n") } | |
else | |
# If target table not found (e.g., for a view not yet processed or a complex reference), put in misc. | |
warn "Warning: Target table/view '#{qualified_target}' for #{object_type} '#{object_name_with_args}' not found in known_tables_map. Appending to misc." | |
file_handles[:misc].puts(full_block_text + "\n\n") | |
end | |
else | |
# If table reference couldn't be parsed, put in misc. | |
warn "Warning: Could not determine target table for #{object_type} '#{object_name_with_args}'. Appending to misc." | |
file_handles[:misc].puts(full_block_text + "\n\n") | |
end | |
when "FUNCTION" | |
# Defer function processing until all tables are known, as function association | |
# heuristics rely on the complete `known_tables_map`. | |
deferred_functions << { | |
full_block_text: full_block_text, | |
object_name_simple: object_name_simple, # e.g., my_function | |
qualified_object_name_simple: qualified_object_name_simple, # e.g., public.my_function | |
ddl_and_comments: ddl_and_comments, # The CREATE FUNCTION ... statement | |
object_name_with_args: object_name_with_args, # e.g., my_function(integer, text) | |
schema_name: schema_name # Schema of the function itself | |
} | |
when "COMMENT" | |
# Comments can be on various object types. | |
# Comments on functions are deferred. Other comments are processed inline. | |
paths_to_write_comment = [] # Collect all file paths where this comment should be written. | |
is_comment_on_function = false | |
# Check if it's a COMMENT ON FUNCTION specifically for deferral. | |
if /COMMENT ON FUNCTION\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/.match?(ddl_and_comments) | |
is_comment_on_function = true | |
end | |
if is_comment_on_function | |
# Defer comments on functions until functions themselves are processed and their output paths known. | |
deferred_comments_on_functions << { | |
full_block_text: full_block_text, # The entire comment block from structure.sql | |
ddl_and_comments: ddl_and_comments, # The "COMMENT ON FUNCTION..." statement itself | |
object_name_with_args: object_name_with_args, # Name from comment's header (e.g., "my_function(integer, text)") | |
schema_name: schema_name # Schema from comment's header | |
} | |
else | |
# Process other comments (TABLE, COLUMN, EXTENSION, SCHEMA, TYPE, SEQUENCE, VIEW, INDEX, etc.) inline. | |
# Determine the target object of the comment and find its file path. | |
if (match_data = ddl_and_comments.match(/COMMENT ON TABLE ((?:[\w"-]+\.)?[\w"-]+)/)) | |
table_ref = match_data[1] | |
schema_t, simple_t = table_ref.include?(".") ? | |
table_ref.delete('"').split(".", 2) : | |
[schema_name, table_ref.delete('"')] # Default to comment's schema if not qualified | |
qname = "#{schema_t}.#{simple_t}" | |
paths_to_write_comment << known_tables_map[qname].to_s if known_tables_map[qname] | |
elsif (match_data = ddl_and_comments.match(/COMMENT ON COLUMN ((?:[\w"-]+\.)?[\w"-]+)\.[\w"-]+/)) | |
table_ref = match_data[1] # This is the table part of "schema.table.column" or "table.column" | |
schema_t, simple_t = table_ref.include?(".") ? | |
table_ref.delete('"').split(".", 2) : | |
[schema_name, table_ref.delete('"')] | |
qname = "#{schema_t}.#{simple_t}" | |
paths_to_write_comment << known_tables_map[qname].to_s if known_tables_map[qname] | |
elsif /COMMENT ON EXTENSION/.match?(ddl_and_comments) | |
# For COMMENT ON EXTENSION, the target name is in object_name_with_args from the header | |
# We need to find its path in object_output_paths | |
if object_output_paths[qualified_object_name_simple] && object_output_paths[qualified_object_name_simple][:path]&.any? | |
paths_to_write_comment.concat(object_output_paths[qualified_object_name_simple][:path]) | |
else | |
# Fallback if extension wasn't recorded, though it should be. | |
paths_to_write_comment << file_handles[:extensions].path.to_s | |
end | |
elsif /COMMENT ON SCHEMA/.match?(ddl_and_comments) | |
# For COMMENT ON SCHEMA, target name is in object_name_with_args | |
if object_output_paths[qualified_object_name_simple] && object_output_paths[qualified_object_name_simple][:path]&.any? | |
paths_to_write_comment.concat(object_output_paths[qualified_object_name_simple][:path]) | |
else | |
paths_to_write_comment << file_handles[:schemas].path.to_s | |
end | |
elsif (match_data = ddl_and_comments.match(/COMMENT ON (TYPE|SEQUENCE|VIEW|MATERIALIZED VIEW)\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/)) | |
commented_obj_type = match_data[1] | |
target_ref = match_data[2].split("(").first.delete('"') # Get name without args | |
schema_of_target, simple_of_target = if target_ref.include?(".") | |
target_ref.split(".", 2) | |
else | |
[schema_name, target_ref] # Default to comment's schema | |
end | |
qname = "#{schema_of_target}.#{simple_of_target}" | |
if object_output_paths[qname] && object_output_paths[qname][:path]&.any? | |
paths_to_write_comment.concat(object_output_paths[qname][:path]) | |
else | |
warn "Warning: Path for commented object #{qname} (type #{commented_obj_type}) not found. Comment: #{object_name_with_args}" | |
end | |
elsif (match_data = ddl_and_comments.match(/COMMENT ON (?:INDEX|TRIGGER|CONSTRAINT)\s+[\w"-]+\s+ON\s+((?:[\w"-]+\.)?[\w"-]+)/)) | |
# Comment on INDEX/TRIGGER/CONSTRAINT: "COMMENT ON INDEX my_index ON my_table" | |
# The target is the table. | |
table_ref = match_data[1] # The table name after ON | |
schema_t, simple_t = table_ref.include?(".") ? | |
table_ref.delete('"').split(".", 2) : | |
[schema_name, table_ref.delete('"')] | |
qname = "#{schema_t}.#{simple_t}" | |
if known_tables_map[qname] | |
paths_to_write_comment << known_tables_map[qname].to_s | |
end | |
end | |
if paths_to_write_comment.any? | |
paths_to_write_comment.uniq.each do |individual_path_str| | |
# Check if it's one of the globally managed file handles first | |
handle_pair = file_handles.find { |_k, fh| fh.path.to_s == individual_path_str.to_s } | |
if handle_pair | |
file_handles[handle_pair.first].puts(full_block_text + "\n\n") | |
else | |
# Otherwise, it's a table-specific file (or view, etc.) | |
File.open(individual_path_str, "a") { |f| f.puts(full_block_text + "\n\n") } | |
end | |
end | |
else | |
# If it's not a comment on function and still no path, then it's misc. | |
warn "Warning: Could not determine target file for non-function COMMENT: #{object_name_with_args}. DDL: #{ddl_and_comments.lines.first.strip}. Appending to misc." | |
file_handles[:misc].puts(full_block_text + "\n\n") | |
end | |
end # end if is_comment_on_function else | |
else | |
# Any object type not explicitly handled above goes to misc. | |
file_handles[:misc].puts(full_block_text + "\n\n") | |
end | |
end # end main parsing loop | |
# --- Process deferred functions --- | |
# Now that all TABLEs/VIEWs are in `known_tables_map`, we can process functions | |
# and try to associate them with the correct table files. | |
deferred_functions.each do |func_data| | |
full_block_text = func_data[:full_block_text] | |
object_name_simple = func_data[:object_name_simple] | |
qualified_object_name_simple = func_data[:qualified_object_name_simple] | |
ddl_and_comments = func_data[:ddl_and_comments] # This is the CREATE FUNCTION ... | |
object_name_with_args = func_data[:object_name_with_args] | |
schema_name = func_data[:schema_name] # Schema of the function itself | |
associated_table_files = Set.new # Use a Set to store unique file paths | |
# Heuristic 1: Function name contains a known table name (case-insensitive). | |
# e.g., function "update_users_last_login" associated with "users" table. | |
known_tables_map.each do |q_table_name, table_path| | |
_tbl_schema, simple_table_name = q_table_name.split(".", 2) | |
if object_name_simple.downcase.include?(simple_table_name.downcase) | |
associated_table_files.add(table_path.to_s) | |
end | |
end | |
# Heuristic 2: Function parameter types match known table types (schema.table or table). | |
# e.g., function "process_order(o public.orders)" associated with "public.orders" table. | |
if object_name_with_args.include?("(") && object_name_with_args.include?(")") | |
params_list_str = object_name_with_args[ | |
object_name_with_args.index("(") + 1..object_name_with_args.rindex(")") - 1 | |
] | |
# Split parameters, handling cases like "integer, character varying" | |
param_definitions = params_list_str.scan(/(?:[^,(]+(?:\([^)]+\))?)+/) # More robust split | |
param_definitions.each do |param_def| | |
# Parameter definition can be "param_name type" or just "type". We need the type. | |
# Regex to extract the last word part, which is likely the type, | |
# handling schema-qualified types like "public.my_type" or arrays "text[]". | |
type_match = param_def.strip.match(/(?:([\w"-]+\.)?[\w"-]+(?:\[\])?)$/) | |
type_part = type_match[0] if type_match | |
next if type_part.blank? | |
potential_q_type_name = if type_part.include?(".") | |
type_part # Already schema-qualified | |
else | |
"#{schema_name}.#{type_part}" # Qualify with function's schema | |
end | |
if known_tables_map[potential_q_type_name] # Check if this type is a known table/view | |
associated_table_files.add(known_tables_map[potential_q_type_name].to_s) | |
end | |
end | |
end | |
# Heuristic 3: Scan function body (DDL) for mentions of known table names (case-insensitive). | |
# This is powerful for triggers or functions that manipulate tables not in their name/signature. | |
known_tables_map.each do |q_table_name, table_path| | |
_tbl_schema, simple_table_name_from_map = q_table_name.split(".", 2) | |
# Regex to find the table name as a whole word (quoted or unquoted). | |
escaped_stn = Regexp.escape(simple_table_name_from_map) | |
body_scan_regex_str = "\"#{escaped_stn}\"|\\b#{escaped_stn}\\b" # \b for word boundary | |
if ddl_and_comments.match?(Regexp.new(body_scan_regex_str, Regexp::IGNORECASE)) | |
associated_table_files.add(table_path.to_s) | |
end | |
end | |
written_function_paths = [] # Record where this function is actually written. | |
if associated_table_files.empty? | |
# If no associations found, write to the global functions file. | |
global_func_path = file_handles[:global_functions].path | |
file_handles[:global_functions].puts(full_block_text + "\n\n") | |
written_function_paths << global_func_path.to_s | |
else | |
# Write the function to all associated table files. | |
associated_table_files.each do |table_file_path_str| | |
File.open(table_file_path_str, "a") { |f| f.puts(full_block_text + "\n\n") } | |
written_function_paths << table_file_path_str | |
end | |
end | |
# Store the path(s) for comment routing. | |
object_output_paths[qualified_object_name_simple] = {type: "FUNCTION", path: written_function_paths} | |
end # end deferred_functions loop | |
# --- Process deferred comments on functions --- | |
# Now that functions are processed and their paths recorded in `object_output_paths`, | |
# we can process comments on these functions. | |
deferred_comments_on_functions.each do |comment_data| | |
full_block_text = comment_data[:full_block_text] # The full COMMENT object block | |
ddl_and_comments = comment_data[:ddl_and_comments] # The "COMMENT ON FUNCTION..." statement | |
schema_name_of_comment_obj = comment_data[:schema_name] # Schema of the COMMENT object itself | |
object_name_with_args_of_comment_obj = comment_data[:object_name_with_args] # Name from COMMENT object's header | |
paths_to_write_comment = [] | |
# Extract the target function name from the "COMMENT ON FUNCTION ..." statement. | |
if (match_data = ddl_and_comments.match(/COMMENT ON FUNCTION\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/)) | |
target_ref_with_args = match_data[1] # e.g., "public.my_func(integer)" or "my_func(integer)" | |
target_ref_simple = target_ref_with_args.split("(").first.delete('"') # e.g., "public.my_func" or "my_func" | |
# Determine the schema of the target function. | |
# If "public.my_func", schema is "public". If "my_func", schema is `schema_name_of_comment_obj`. | |
schema_of_target_func, simple_of_target_func = if target_ref_simple.include?(".") | |
target_ref_simple.split(".", 2) | |
else | |
[schema_name_of_comment_obj, target_ref_simple] | |
end | |
qname_of_func = "#{schema_of_target_func}.#{simple_of_target_func}" # Qualified name of the function being commented on | |
# Look up the function's output path(s). | |
if object_output_paths[qname_of_func] && object_output_paths[qname_of_func][:path]&.any? | |
paths_to_write_comment.concat(object_output_paths[qname_of_func][:path]) | |
else | |
warn "Warning: Path for commented function #{qname_of_func} not found during deferred processing. Comment object: #{object_name_with_args_of_comment_obj}" | |
end | |
end | |
if paths_to_write_comment.any? | |
paths_to_write_comment.uniq.each do |individual_path_str| | |
# Check if it's one of the globally managed file handles | |
handle_pair = file_handles.find { |_k, fh| fh.path.to_s == individual_path_str.to_s } | |
if handle_pair | |
file_handles[handle_pair.first].puts(full_block_text + "\n\n") | |
else | |
# Otherwise, it's a table-specific file. | |
File.open(individual_path_str, "a") { |f| f.puts(full_block_text + "\n\n") } | |
end | |
end | |
else | |
# If comment target couldn't be resolved, put in misc. | |
warn "Warning: Could not determine target file for deferred COMMENT ON FUNCTION: #{object_name_with_args_of_comment_obj}. DDL: #{ddl_and_comments.lines.first.strip}. Appending to misc." | |
file_handles[:misc].puts(full_block_text + "\n\n") | |
end | |
end # end deferred_comments_on_functions loop | |
# --- Finalization --- | |
# Close all initially opened file handles for global files. | |
file_handles.each_value(&:close) | |
# Write the extracted schema_migrations content to its own file. | |
unless schema_migrations_content.empty? | |
File.write(output_base_dir.join("_schema_migrations.sql"), schema_migrations_content.strip + "\n") | |
end | |
# Write the extracted final SET search_path command to its own file. | |
unless final_set_search_path_content.empty? | |
File.write(output_base_dir.join("_postamble.sql"), final_set_search_path_content.strip + "\n") | |
end | |
puts "db/structure.sql has been split into files in #{output_base_dir}" | |
puts "Global, non-table-specific items are in the '_global' directory." | |
puts "Review misc/_misc.sql for any unclassified DDL statements." | |
puts "Note: Complex function/type to table associations may require manual review." | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment