chelseadole · October 1, 2023 19:36
diff --git a/duplicate_to_partitioned_table.sql b/duplicate_to_partitioned_table.sql
 -- This function is designed to duplicate all live INSERTS/UPDATES/DELETES from one table (referred to as "source_table_name"
 -- to a second partitioned table (referred to as "destination_table_name"). The function should be set to trigger after insert/
 -- update/delete on the source table. 

 -- This function is designed to be leveraged for partitioned table migration through this method:
 --    1) Create an empty partitioned copy of the "source_table_name". Alter primary key as necessary, as partitioned Postgres
 --       tables do not support unique/primary keys not included in the partition key.
 --    2) Create the following function, and attach it as a trigger to "source_table_name". At this point, incoming new DML is
 --       being copied successfully to the partitioned table, so only historical data will need to be backfilled. 
 --    3) Target rows in "source_table_name" with an updated_at value BEFORE the trigger was attached, and backfill them into 
 --       "destination_table_name" through bulk inserts. When conflict on primary key occurs, do nothing -- as this means that
 --       live traffic has concurrently loaded the value to "destination_table_name", and should take priority. Backfill may be
 --       completed via a script, or directly on Postgres using Postgres functions. 

 -- The following assumptions are made for the purposes of this function:
 --    1) The "destination_table_name" has a valid PRIMARY KEY (PK) present in the partition key. This PK may be utilized by
 --       live traffic to perform "INSERT ... ON CONFLICT ..." (aka, "upsert") queries
 --    2) The "source_table_name" has a fields called "updated_at" (timestamp), "created_at" (timestamp), and "value" (any, 
 --       used as an example. 

 -- NOTE: THIS IS AN EXAMPLE. REAL TABLE/COLUMN NAMES MUST BE CORRECTED, AND IT IS LIKELY THAT THE FUNCTION DEFINITION WILL
 --       REQUIRE CHANGE FOR REAL APPLICATIONS

 CREATE OR REPLACE FUNCTION duplicate_to_partitioned_table()
 RETURNS TRIGGER AS
 $$
 BEGIN
  IF ( TG_OP = 'INSERT') THEN
      -- 1) Insert row. Because the table has a PRIMARY KEY, it is important that this fails if a row for the PK already exists
      INSERT INTO <destination_table_name> VALUES (NEW.*);
      RETURN NEW;
  ELSIF ( TG_OP = 'UPDATE') THEN
      -- 2) Upsert into new table. If not exists, write new value. If the value already exists, update only the values which
      --    should be updated. In this example, the "created_at" value should remain the same. 
      INSERT INTO <destination_table_name> VALUES (NEW.*)
      ON CONFLICT <primary_key> DO UPDATE
      SET value = NEW.value, updated_at = NEW.updated_at, created_at = OLD.created_at;
      RETURN NEW;
  ELSIF ( TG_OP = 'DELETE') THEN
      -- 3) Delete row. 
      DELETE FROM <destination_table_name> WHERE id = OLD.id;
      RETURN OLD;
  END IF;
 END;
 $$ LANGUAGE PLPGSQL;

 CREATE TRIGGER duplicate_to_partitioned_table_trigger
 AFTER INSERT OR UPDATE OR DELETE ON <source_table_name>
 FOR EACH ROW EXECUTE PROCEDURE duplicate_to_partitioned_table();
	-- This function is designed to duplicate all live INSERTS/UPDATES/DELETES from one table (referred to as "source_table_name"
	-- to a second partitioned table (referred to as "destination_table_name"). The function should be set to trigger after insert/
	-- update/delete on the source table.

	-- This function is designed to be leveraged for partitioned table migration through this method:
	-- 1) Create an empty partitioned copy of the "source_table_name". Alter primary key as necessary, as partitioned Postgres
	-- tables do not support unique/primary keys not included in the partition key.
	-- 2) Create the following function, and attach it as a trigger to "source_table_name". At this point, incoming new DML is
	-- being copied successfully to the partitioned table, so only historical data will need to be backfilled.
	-- 3) Target rows in "source_table_name" with an updated_at value BEFORE the trigger was attached, and backfill them into
	-- "destination_table_name" through bulk inserts. When conflict on primary key occurs, do nothing -- as this means that
	-- live traffic has concurrently loaded the value to "destination_table_name", and should take priority. Backfill may be
	-- completed via a script, or directly on Postgres using Postgres functions.

	-- The following assumptions are made for the purposes of this function:
	-- 1) The "destination_table_name" has a valid PRIMARY KEY (PK) present in the partition key. This PK may be utilized by
	-- live traffic to perform "INSERT ... ON CONFLICT ..." (aka, "upsert") queries
	-- 2) The "source_table_name" has a fields called "updated_at" (timestamp), "created_at" (timestamp), and "value" (any,
	-- used as an example.

	-- NOTE: THIS IS AN EXAMPLE. REAL TABLE/COLUMN NAMES MUST BE CORRECTED, AND IT IS LIKELY THAT THE FUNCTION DEFINITION WILL
	-- REQUIRE CHANGE FOR REAL APPLICATIONS

	CREATE OR REPLACE FUNCTION duplicate_to_partitioned_table()
	RETURNS TRIGGER AS
	$$
	BEGIN
	IF ( TG_OP = 'INSERT') THEN
	-- 1) Insert row. Because the table has a PRIMARY KEY, it is important that this fails if a row for the PK already exists
	INSERT INTO <destination_table_name> VALUES (NEW.*);
	RETURN NEW;
	ELSIF ( TG_OP = 'UPDATE') THEN
	-- 2) Upsert into new table. If not exists, write new value. If the value already exists, update only the values which
	-- should be updated. In this example, the "created_at" value should remain the same.
	INSERT INTO <destination_table_name> VALUES (NEW.*)
	ON CONFLICT <primary_key> DO UPDATE
	SET value = NEW.value, updated_at = NEW.updated_at, created_at = OLD.created_at;
	RETURN NEW;
	ELSIF ( TG_OP = 'DELETE') THEN
	-- 3) Delete row.
	DELETE FROM <destination_table_name> WHERE id = OLD.id;
	RETURN OLD;
	END IF;
	END;
	$$ LANGUAGE PLPGSQL;

	CREATE TRIGGER duplicate_to_partitioned_table_trigger
	AFTER INSERT OR UPDATE OR DELETE ON <source_table_name>
	FOR EACH ROW EXECUTE PROCEDURE duplicate_to_partitioned_table();