barelyknown · May 11, 2025 00:26
diff --git a/db_structure_splitter.rake b/db_structure_splitter.rake
 require "fileutils"
 require "active_support/core_ext/string/inflections"

 namespace :db do
  # db:split_structure
  # ------------------
  # This Rake task processes the `db/structure.sql` file generated by Rails (typically from `pg_dump`).
  # Its primary goal is to split this monolithic SQL file into a more manageable, organized
  # set of smaller SQL files. The organization is primarily table-centric, meaning that DDL
  # statements related to a specific table (CREATE TABLE, ALTER TABLE, CREATE INDEX, CREATE TRIGGER,
  # associated functions, comments) are grouped into a single file for that table.
  #
  # Output Structure:
  #   db/structure_split/
  #   ├── tables/                     # Contains one .sql file per table, view, or materialized view
  #   │   └── public.users.sql
  #   │   └── public.posts.sql
  #   │   └── ...
  #   ├── _global/                    # Contains DDL for objects not specific to a single table
  #   │   └── _extensions.sql
  #   │   └── _schemas.sql
  #   │   └── _types.sql              # Global ENUMs and other custom types
  #   │   └── _functions.sql          # Functions not clearly associated with a single table
  #   │   └── _sequences.sql          # Sequences not OWNED BY a table column
  #   ├── misc/                       # Contains DDL that couldn't be classified
  #   │   └── _misc.sql
  #   ├── _preamble.sql               # Initial SET commands and other setup from structure.sql
  #   ├── _schema_migrations.sql      # INSERT statements for schema_migrations table
  #   └── _postamble.sql              # Final SET search_path command
  #
  # Processing Logic:
  # 1. Pre-extraction:
  #    - Extracts `INSERT INTO "schema_migrations"` statements.
  #    - Extracts the final `SET search_path` command.
  #    - Extracts the initial "preamble" (SET commands before the first object definition).
  # 2. Main Parsing Loop:
  #    - Iterates through `structure.sql`, identifying DDL objects based on `pg_dump`'s
  #      `-- Name: ...; Type: ...; Schema: ...;` comment headers.
  #    - Routes DDL for SCHEMAs, EXTENSIONs, TYPEs, TABLEs, VIEWs, MATERIALIZED VIEWs,
  #      SEQUENCEs (owned by tables), INDEXes, TRIGGERs, DEFAULTs, and CONSTRAINTs to
  #      their respective files or the `misc` file.
  #    - Stores TABLE, VIEW, and MATERIALIZED VIEW information in `known_tables_map` for later association.
  #    - Stores output paths for most objects in `object_output_paths`.
  #    - Defers processing of FUNCTIONs and COMMENTs ON FUNCTIONs.
  # 3. Deferred Processing:
  #    - Processes all deferred FUNCTIONs:
  #      - Uses heuristics (name matching, parameter type matching, body content scanning)
  #        to associate functions with one or more tables.
  #      - Writes function DDL to associated table files or to `_global/_functions.sql`.
  #    - Processes all deferred COMMENTs ON FUNCTIONs, writing them to the same file(s)
  #      as their target functions.
  #
  # Key Data Structures:
  # - `known_tables_map`: Maps qualified table/view names to their output file paths.
  # - `object_output_paths`: Maps qualified object names to their type and an array of output file paths.
  # - `deferred_functions`: Stores function DDL and metadata for processing after all tables are known.
  # - `deferred_comments_on_functions`: Stores comment DDL for functions, processed after functions.
  #
  desc "Splits the db/structure.sql file into per-table files and other categories, with a table-centric organization."
  task split_structure: :environment do
    structure_file_path = Rails.root.join("db", "structure.sql")
    output_base_dir = Rails.root.join("db", "structure_split")

    unless File.exist?(structure_file_path)
      warn "Error: #{structure_file_path} not found."
      exit 1
    end

    puts "Cleaning up and creating output directory: #{output_base_dir}"
    FileUtils.rm_rf(output_base_dir)
    # Create base output directories
    FileUtils.mkdir_p(output_base_dir.join("tables"))
    FileUtils.mkdir_p(output_base_dir.join("_global"))
    FileUtils.mkdir_p(output_base_dir.join("misc"))

    sql_content = File.read(structure_file_path)

    # --- Phase 1: Extract schema_migrations and final SET search_path ---
    # These are special cases that are handled separately from the main DDL object parsing.
    schema_migrations_content = ""
    final_set_search_path_content = ""

    # Regex to capture all INSERT INTO "schema_migrations" statements.
    # These are typically grouped together but might be interspersed with comments.
    schema_migrations_regex = /^(INSERT INTO "schema_migrations" .*?;(?:\s*--.*?\n|\s*\n)*)+/m
    if sql_content.match(schema_migrations_regex)
      schema_migrations_content = $1.strip
      sql_content = sql_content.sub(schema_migrations_content, "") # Remove from main content for further processing
    end

    # Extract the final "SET search_path = ..." line, usually at the very end of the file.
    if sql_content =~ /(\nSET search_path = .*?;\s*)$/m
      final_set_search_path_content = $1.strip
      sql_content = sql_content.sub(final_set_search_path_content, "").strip # Remove and clean up
    end

    # --- Phase 2: Process the rest of the content ---
    # Open file handles for global DDL categories. These files will accumulate DDL
    # that isn't specific to a single table or is explicitly global (like extensions).
    file_handles = {
      preamble: File.open(output_base_dir.join("_preamble.sql"), "w"),
      extensions: File.open(output_base_dir.join("_global", "_extensions.sql"), "w"),
      schemas: File.open(output_base_dir.join("_global", "_schemas.sql"), "w"),
      global_types: File.open(output_base_dir.join("_global", "_types.sql"), "w"),
      global_sequences: File.open(output_base_dir.join("_global", "_sequences.sql"), "w"),
      global_functions: File.open(output_base_dir.join("_global", "_functions.sql"), "w"),
      misc: File.open(output_base_dir.join("misc", "_misc.sql"), "w") # Catch-all for unclassified DDL
    }

    # `known_tables_map`: Stores "schema.table_simple_name" => Pathname object of the table's SQL file.
    # Used to quickly find where to write DDL related to a known table (e.g., indexes, triggers, comments).
    known_tables_map = {}

    # `object_output_paths`: Stores qualified object names (e.g., "public.my_type", "public.my_function")
    # as keys. Values are hashes like { type: "TYPE", path: ["/path/to/type_file.sql"] }.
    # The `path` is an array because functions can be written to multiple table files.
    # This map is crucial for routing comments to the correct file(s) where the commented object was written.
    object_output_paths = {}

    # `deferred_functions` and `deferred_comments_on_functions`:
    # Functions and comments on functions are processed *after* the main loop.
    # This is because function-to-table association heuristics rely on `known_tables_map` being fully populated,
    # and comments on functions rely on `object_output_paths` for functions being populated.
    deferred_functions = []
    deferred_comments_on_functions = []

    # Extract the "preamble": SQL commands (mostly SET statements) at the beginning of
    # structure.sql, before the first DDL object definition.
    current_pos = 0
    first_header_match = sql_content.match(/^--\n-- Name:/m) # Standard pg_dump object header

    if first_header_match
      preamble_text = sql_content[0...first_header_match.begin(0)].strip
      file_handles[:preamble].puts(preamble_text + "\n\n") unless preamble_text.empty?
      current_pos = first_header_match.begin(0) # Start main parsing from here
    else
      # If no standard headers are found, assume the whole remaining content is preamble (unlikely for valid structure.sql)
      file_handles[:preamble].puts(sql_content.strip + "\n\n") unless sql_content.strip.empty?
      sql_content = "" # No more content to parse
    end

    # Main parsing loop: Iterates over DDL objects identified by pg_dump's comment headers.
    # Regex captures:
    #   1. object_name_with_args: e.g., "users", "my_function(integer, text)"
    #   2. object_type: e.g., "TABLE", "FUNCTION", "INDEX"
    #   3. schema_name_raw: e.g., "public", "-" (for objects not in a schema, like extensions)
    #   4. ddl_and_comments: The actual SQL DDL statement(s) for the object.
    sql_content[current_pos..].scan(
      /^--\n-- Name: (.*?); Type: (.*?); Schema: (.*?);(?: Owner:.*?)?\n--\n([\s\S]*?)(?=(?:^--\n-- Name:|\z))/m
    ).each do |object_name_with_args, object_type, schema_name_raw, ddl_and_comments|
      # Reconstruct the full block text including the header for writing to files.
      full_block_text = "--\n-- Name: #{object_name_with_args}; Type: #{object_type}; Schema: #{schema_name_raw}; Owner: -\n--\n#{ddl_and_comments.strip}"

      # Normalize names: remove quotes, extract simple name from name with arguments.
      object_name_simple = object_name_with_args.split("(").first.strip.delete('"')
      schema_name = if schema_name_raw.nil? || schema_name_raw.strip == "-" || schema_name_raw.strip.empty?
        "public" # Default to 'public' if schema is not specified or is '-'
      else
        schema_name_raw.strip.delete('"')
      end
      qualified_object_name_simple = "#{schema_name}.#{object_name_simple}"

      # Sanitize names for use in filenames (replace non-alphanumeric characters).
      sanitized_schema_name = schema_name.gsub(/[^a-zA-Z0-9_.-]+/, "_")
      sanitized_object_name_simple = object_name_simple.gsub(/[^a-zA-Z0-9_.-]+/, "_")

      case object_type
      when "SCHEMA"
        file_handles[:schemas].puts(full_block_text + "\n\n")
        # Schemas are global, not typically associated with a single table.

      when "EXTENSION"
        file_handles[:extensions].puts(full_block_text + "\n\n")
        # Extensions are global.
        # Record its path for potential comments ON EXTENSION.
        object_output_paths[qualified_object_name_simple] = {type: "EXTENSION", path: [file_handles[:extensions].path.to_s]}

      when "TYPE"
        # Custom types (ENUMs, etc.) are generally written to a global types file.
        # Associating them reliably with specific tables can be complex.
        output_file_path = file_handles[:global_types].path
        file_handles[:global_types].puts(full_block_text + "\n\n")
        object_output_paths[qualified_object_name_simple] = {type: "TYPE", path: [output_file_path.to_s]}

      when "TABLE", "VIEW", "MATERIALIZED VIEW"
        # Tables, Views, and Materialized Views each get their own file in the 'tables/' directory.
        # They are central to the organization.
        table_file_name = "#{sanitized_schema_name}.#{sanitized_object_name_simple}.sql"
        output_file_path = output_base_dir.join("tables", table_file_name)
        File.open(output_file_path, "a") { |f| f.puts(full_block_text + "\n\n") }
        # Add to known_tables_map for associating other DDL (indexes, triggers, functions, comments).
        known_tables_map[qualified_object_name_simple] = output_file_path
        # Record its path for comments ON TABLE/VIEW.
        object_output_paths[qualified_object_name_simple] = {type: object_type, path: [output_file_path.to_s]}

      when "SEQUENCE"
        # Sequences can be "OWNED BY" a table column, or be global.
        output_file_path = nil # Initialize
        owned_by_match = ddl_and_comments.match(/OWNED BY\s+((?:[\w"-]+\.)?[\w"-]+)\.([\w"-]+);/m)
        if owned_by_match
          # If OWNED BY a table, try to write it to that table's file.
          owner_table_ref, _owner_column = owned_by_match.captures
          owner_schema = owner_table_ref.include?(".") ? owner_table_ref.split(".").first.delete('"') : schema_name
          owner_table_simple = owner_table_ref.split(".").last.delete('"')
          qualified_owner = "#{owner_schema}.#{owner_table_simple}"

          if known_tables_map[qualified_owner]
            output_file_path = known_tables_map[qualified_owner]
            File.open(output_file_path, "a") { |f| f.puts(full_block_text + "\n\n") }
          else
            # If owning table not found (should be rare if structure.sql is ordered), put in global sequences.
            output_file_path = file_handles[:global_sequences].path
            file_handles[:global_sequences].puts(full_block_text + "\n\n")
          end
        else
          # If not OWNED BY, it's a global sequence.
          output_file_path = file_handles[:global_sequences].path
          file_handles[:global_sequences].puts(full_block_text + "\n\n")
        end
        object_output_paths[qualified_object_name_simple] = {type: "SEQUENCE", path: [output_file_path.to_s]}

      when "INDEX", "TRIGGER", "DEFAULT", "CONSTRAINT"
        # These objects are typically ALTER TABLE statements or CREATE INDEX/TRIGGER ON a table.
        # They are associated with a specific table.
        table_ref = nil # Will store the qualified name of the target table.
        # Extract target table name from DDL.
        match = if object_type == "INDEX" || object_type == "TRIGGER"
          # For CREATE INDEX ... ON table_name or CREATE TRIGGER ... ON table_name
          # The regex specifically looks for " ON " (case-insensitive) followed by the table name.
          ddl_and_comments.match(/\sON\s+((?:[\w"-]+\.)?[\w"-]+)/im)
        else # DEFAULT, CONSTRAINT (usually ALTER TABLE)
          ddl_and_comments.match(/ALTER TABLE(?: ONLY)?\s+((?:[\w"-]+\.)?[\w"-]+)\s+/)
        end
        table_ref = match[1] if match && match[1]

        if table_ref
          target_schema, target_simple = if table_ref.include?(".")
            parts = table_ref.delete('"').split(".", 2)
            # Ensure parts has two elements, otherwise default schema
            (parts.size == 2) ? parts : [schema_name, parts.first]
          else
            [schema_name, table_ref.delete('"')] # Default to current object's schema
          end
          qualified_target = "#{target_schema}.#{target_simple}"
          if known_tables_map[qualified_target]
            # If target table is known, append to its file.
            File.open(known_tables_map[qualified_target], "a") { |f| f.puts(full_block_text + "\n\n") }
          else
            # If target table not found (e.g., for a view not yet processed or a complex reference), put in misc.
            warn "Warning: Target table/view '#{qualified_target}' for #{object_type} '#{object_name_with_args}' not found in known_tables_map. Appending to misc."
            file_handles[:misc].puts(full_block_text + "\n\n")
          end
        else
          # If table reference couldn't be parsed, put in misc.
          warn "Warning: Could not determine target table for #{object_type} '#{object_name_with_args}'. Appending to misc."
          file_handles[:misc].puts(full_block_text + "\n\n")
        end

      when "FUNCTION"
        # Defer function processing until all tables are known, as function association
        # heuristics rely on the complete `known_tables_map`.
        deferred_functions << {
          full_block_text: full_block_text,
          object_name_simple: object_name_simple, # e.g., my_function
          qualified_object_name_simple: qualified_object_name_simple, # e.g., public.my_function
          ddl_and_comments: ddl_and_comments, # The CREATE FUNCTION ... statement
          object_name_with_args: object_name_with_args, # e.g., my_function(integer, text)
          schema_name: schema_name # Schema of the function itself
        }

      when "COMMENT"
        # Comments can be on various object types.
        # Comments on functions are deferred. Other comments are processed inline.
        paths_to_write_comment = [] # Collect all file paths where this comment should be written.
        is_comment_on_function = false

        # Check if it's a COMMENT ON FUNCTION specifically for deferral.
        if /COMMENT ON FUNCTION\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/.match?(ddl_and_comments)
          is_comment_on_function = true
        end

        if is_comment_on_function
          # Defer comments on functions until functions themselves are processed and their output paths known.
          deferred_comments_on_functions << {
            full_block_text: full_block_text, # The entire comment block from structure.sql
            ddl_and_comments: ddl_and_comments, # The "COMMENT ON FUNCTION..." statement itself
            object_name_with_args: object_name_with_args, # Name from comment's header (e.g., "my_function(integer, text)")
            schema_name: schema_name # Schema from comment's header
          }
        else
          # Process other comments (TABLE, COLUMN, EXTENSION, SCHEMA, TYPE, SEQUENCE, VIEW, INDEX, etc.) inline.
          # Determine the target object of the comment and find its file path.
          if (match_data = ddl_and_comments.match(/COMMENT ON TABLE ((?:[\w"-]+\.)?[\w"-]+)/))
            table_ref = match_data[1]
            schema_t, simple_t = table_ref.include?(".") ?
              table_ref.delete('"').split(".", 2) :
              [schema_name, table_ref.delete('"')] # Default to comment's schema if not qualified
            qname = "#{schema_t}.#{simple_t}"
            paths_to_write_comment << known_tables_map[qname].to_s if known_tables_map[qname]
          elsif (match_data = ddl_and_comments.match(/COMMENT ON COLUMN ((?:[\w"-]+\.)?[\w"-]+)\.[\w"-]+/))
            table_ref = match_data[1] # This is the table part of "schema.table.column" or "table.column"
            schema_t, simple_t = table_ref.include?(".") ?
              table_ref.delete('"').split(".", 2) :
              [schema_name, table_ref.delete('"')]
            qname = "#{schema_t}.#{simple_t}"
            paths_to_write_comment << known_tables_map[qname].to_s if known_tables_map[qname]
          elsif /COMMENT ON EXTENSION/.match?(ddl_and_comments)
            # For COMMENT ON EXTENSION, the target name is in object_name_with_args from the header
            # We need to find its path in object_output_paths
            if object_output_paths[qualified_object_name_simple] && object_output_paths[qualified_object_name_simple][:path]&.any?
              paths_to_write_comment.concat(object_output_paths[qualified_object_name_simple][:path])
            else
              # Fallback if extension wasn't recorded, though it should be.
              paths_to_write_comment << file_handles[:extensions].path.to_s
            end
          elsif /COMMENT ON SCHEMA/.match?(ddl_and_comments)
            # For COMMENT ON SCHEMA, target name is in object_name_with_args
            if object_output_paths[qualified_object_name_simple] && object_output_paths[qualified_object_name_simple][:path]&.any?
              paths_to_write_comment.concat(object_output_paths[qualified_object_name_simple][:path])
            else
              paths_to_write_comment << file_handles[:schemas].path.to_s
            end
          elsif (match_data = ddl_and_comments.match(/COMMENT ON (TYPE|SEQUENCE|VIEW|MATERIALIZED VIEW)\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/))
            commented_obj_type = match_data[1]
            target_ref = match_data[2].split("(").first.delete('"') # Get name without args
            schema_of_target, simple_of_target = if target_ref.include?(".")
              target_ref.split(".", 2)
            else
              [schema_name, target_ref] # Default to comment's schema
            end
            qname = "#{schema_of_target}.#{simple_of_target}"
            if object_output_paths[qname] && object_output_paths[qname][:path]&.any?
              paths_to_write_comment.concat(object_output_paths[qname][:path])
            else
              warn "Warning: Path for commented object #{qname} (type #{commented_obj_type}) not found. Comment: #{object_name_with_args}"
            end
          elsif (match_data = ddl_and_comments.match(/COMMENT ON (?:INDEX|TRIGGER|CONSTRAINT)\s+[\w"-]+\s+ON\s+((?:[\w"-]+\.)?[\w"-]+)/))
            # Comment on INDEX/TRIGGER/CONSTRAINT: "COMMENT ON INDEX my_index ON my_table"
            # The target is the table.
            table_ref = match_data[1] # The table name after ON
            schema_t, simple_t = table_ref.include?(".") ?
              table_ref.delete('"').split(".", 2) :
              [schema_name, table_ref.delete('"')]
            qname = "#{schema_t}.#{simple_t}"
            if known_tables_map[qname]
              paths_to_write_comment << known_tables_map[qname].to_s
            end
          end

          if paths_to_write_comment.any?
            paths_to_write_comment.uniq.each do |individual_path_str|
              # Check if it's one of the globally managed file handles first
              handle_pair = file_handles.find { |_k, fh| fh.path.to_s == individual_path_str.to_s }
              if handle_pair
                file_handles[handle_pair.first].puts(full_block_text + "\n\n")
              else
                # Otherwise, it's a table-specific file (or view, etc.)
                File.open(individual_path_str, "a") { |f| f.puts(full_block_text + "\n\n") }
              end
            end
          else
            # If it's not a comment on function and still no path, then it's misc.
            warn "Warning: Could not determine target file for non-function COMMENT: #{object_name_with_args}. DDL: #{ddl_and_comments.lines.first.strip}. Appending to misc."
            file_handles[:misc].puts(full_block_text + "\n\n")
          end
        end # end if is_comment_on_function else

      else
        # Any object type not explicitly handled above goes to misc.
        file_handles[:misc].puts(full_block_text + "\n\n")
      end
    end # end main parsing loop

    # --- Process deferred functions ---
    # Now that all TABLEs/VIEWs are in `known_tables_map`, we can process functions
    # and try to associate them with the correct table files.
    deferred_functions.each do |func_data|
      full_block_text = func_data[:full_block_text]
      object_name_simple = func_data[:object_name_simple]
      qualified_object_name_simple = func_data[:qualified_object_name_simple]
      ddl_and_comments = func_data[:ddl_and_comments] # This is the CREATE FUNCTION ...
      object_name_with_args = func_data[:object_name_with_args]
      schema_name = func_data[:schema_name] # Schema of the function itself

      associated_table_files = Set.new # Use a Set to store unique file paths

      # Heuristic 1: Function name contains a known table name (case-insensitive).
      # e.g., function "update_users_last_login" associated with "users" table.
      known_tables_map.each do |q_table_name, table_path|
        _tbl_schema, simple_table_name = q_table_name.split(".", 2)
        if object_name_simple.downcase.include?(simple_table_name.downcase)
          associated_table_files.add(table_path.to_s)
        end
      end

      # Heuristic 2: Function parameter types match known table types (schema.table or table).
      # e.g., function "process_order(o public.orders)" associated with "public.orders" table.
      if object_name_with_args.include?("(") && object_name_with_args.include?(")")
        params_list_str = object_name_with_args[
          object_name_with_args.index("(") + 1..object_name_with_args.rindex(")") - 1
        ]
        # Split parameters, handling cases like "integer, character varying"
        param_definitions = params_list_str.scan(/(?:[^,(]+(?:\([^)]+\))?)+/) # More robust split

        param_definitions.each do |param_def|
          # Parameter definition can be "param_name type" or just "type". We need the type.
          # Regex to extract the last word part, which is likely the type,
          # handling schema-qualified types like "public.my_type" or arrays "text[]".
          type_match = param_def.strip.match(/(?:([\w"-]+\.)?[\w"-]+(?:\[\])?)$/)
          type_part = type_match[0] if type_match
          next if type_part.blank?

          potential_q_type_name = if type_part.include?(".")
            type_part # Already schema-qualified
          else
            "#{schema_name}.#{type_part}" # Qualify with function's schema
          end
          if known_tables_map[potential_q_type_name] # Check if this type is a known table/view
            associated_table_files.add(known_tables_map[potential_q_type_name].to_s)
          end
        end
      end

      # Heuristic 3: Scan function body (DDL) for mentions of known table names (case-insensitive).
      # This is powerful for triggers or functions that manipulate tables not in their name/signature.
      known_tables_map.each do |q_table_name, table_path|
        _tbl_schema, simple_table_name_from_map = q_table_name.split(".", 2)
        # Regex to find the table name as a whole word (quoted or unquoted).
        escaped_stn = Regexp.escape(simple_table_name_from_map)
        body_scan_regex_str = "\"#{escaped_stn}\"|\\b#{escaped_stn}\\b" # \b for word boundary
        if ddl_and_comments.match?(Regexp.new(body_scan_regex_str, Regexp::IGNORECASE))
          associated_table_files.add(table_path.to_s)
        end
      end

      written_function_paths = [] # Record where this function is actually written.
      if associated_table_files.empty?
        # If no associations found, write to the global functions file.
        global_func_path = file_handles[:global_functions].path
        file_handles[:global_functions].puts(full_block_text + "\n\n")
        written_function_paths << global_func_path.to_s
      else
        # Write the function to all associated table files.
        associated_table_files.each do |table_file_path_str|
          File.open(table_file_path_str, "a") { |f| f.puts(full_block_text + "\n\n") }
          written_function_paths << table_file_path_str
        end
      end
      # Store the path(s) for comment routing.
      object_output_paths[qualified_object_name_simple] = {type: "FUNCTION", path: written_function_paths}
    end # end deferred_functions loop

    # --- Process deferred comments on functions ---
    # Now that functions are processed and their paths recorded in `object_output_paths`,
    # we can process comments on these functions.
    deferred_comments_on_functions.each do |comment_data|
      full_block_text = comment_data[:full_block_text] # The full COMMENT object block
      ddl_and_comments = comment_data[:ddl_and_comments] # The "COMMENT ON FUNCTION..." statement
      schema_name_of_comment_obj = comment_data[:schema_name] # Schema of the COMMENT object itself
      object_name_with_args_of_comment_obj = comment_data[:object_name_with_args] # Name from COMMENT object's header

      paths_to_write_comment = []

      # Extract the target function name from the "COMMENT ON FUNCTION ..." statement.
      if (match_data = ddl_and_comments.match(/COMMENT ON FUNCTION\s+((?:[\w"-]+\.)?[\w"-]+(?:\(.*?\))?)/))
        target_ref_with_args = match_data[1] # e.g., "public.my_func(integer)" or "my_func(integer)"
        target_ref_simple = target_ref_with_args.split("(").first.delete('"') # e.g., "public.my_func" or "my_func"

        # Determine the schema of the target function.
        # If "public.my_func", schema is "public". If "my_func", schema is `schema_name_of_comment_obj`.
        schema_of_target_func, simple_of_target_func = if target_ref_simple.include?(".")
          target_ref_simple.split(".", 2)
        else
          [schema_name_of_comment_obj, target_ref_simple]
        end
        qname_of_func = "#{schema_of_target_func}.#{simple_of_target_func}" # Qualified name of the function being commented on

        # Look up the function's output path(s).
        if object_output_paths[qname_of_func] && object_output_paths[qname_of_func][:path]&.any?
          paths_to_write_comment.concat(object_output_paths[qname_of_func][:path])
        else
          warn "Warning: Path for commented function #{qname_of_func} not found during deferred processing. Comment object: #{object_name_with_args_of_comment_obj}"
        end
      end

      if paths_to_write_comment.any?
        paths_to_write_comment.uniq.each do |individual_path_str|
          # Check if it's one of the globally managed file handles
          handle_pair = file_handles.find { |_k, fh| fh.path.to_s == individual_path_str.to_s }
          if handle_pair
            file_handles[handle_pair.first].puts(full_block_text + "\n\n")
          else
            # Otherwise, it's a table-specific file.
            File.open(individual_path_str, "a") { |f| f.puts(full_block_text + "\n\n") }
          end
        end
      else
        # If comment target couldn't be resolved, put in misc.
        warn "Warning: Could not determine target file for deferred COMMENT ON FUNCTION: #{object_name_with_args_of_comment_obj}. DDL: #{ddl_and_comments.lines.first.strip}. Appending to misc."
        file_handles[:misc].puts(full_block_text + "\n\n")
      end
    end # end deferred_comments_on_functions loop

    # --- Finalization ---
    # Close all initially opened file handles for global files.
    file_handles.each_value(&:close)

    # Write the extracted schema_migrations content to its own file.
    unless schema_migrations_content.empty?
      File.write(output_base_dir.join("_schema_migrations.sql"), schema_migrations_content.strip + "\n")
    end

    # Write the extracted final SET search_path command to its own file.
    unless final_set_search_path_content.empty?
      File.write(output_base_dir.join("_postamble.sql"), final_set_search_path_content.strip + "\n")
    end

    puts "db/structure.sql has been split into files in #{output_base_dir}"
    puts "Global, non-table-specific items are in the '_global' directory."
    puts "Review misc/_misc.sql for any unclassified DDL statements."
    puts "Note: Complex function/type to table associations may require manual review."
  end
 end