Skip to content

Instantly share code, notes, and snippets.

@kmoppel
Created August 5, 2025 23:57
Show Gist options
  • Save kmoppel/8dedcf01917e3fbc33cc31d48dbd3e0f to your computer and use it in GitHub Desktop.
Save kmoppel/8dedcf01917e3fbc33cc31d48dbd3e0f to your computer and use it in GitHub Desktop.
Emulate pg_repack in pure SQL + triggers
#!/bin/bash
set -euo pipefail
SCALE=1200
FF=80
UNLOGGED="unlogged" # set to "" for normal tables
UNLOGGED=""
ROW_CHANGES=100000 # 0.1% of scale 1000
SQL_DEL_TRG=$(cat << "EOF"
-- Trigger function for BEFORE DELETE
CREATE OR REPLACE FUNCTION trg_pgbench_accounts_delete()
RETURNS TRIGGER AS $$
BEGIN
INSERT INTO pgbench_accounts_delete_log (aid) VALUES (OLD.aid);
RETURN OLD;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER pgbench_accounts_deletes_logging
BEFORE DELETE ON pgbench_accounts
FOR EACH ROW
EXECUTE FUNCTION trg_pgbench_accounts_delete();
EOF
)
SQL_MERGE_INS_UPD=$(cat << "EOF"
-- PS ON CONFLICT approach is much faster than separate INSERT + UPDATE
INSERT INTO pgbench_accounts_new (aid, bid, abalance, filler)
SELECT
aid, bid, abalance, filler
FROM pgbench_accounts
WHERE mtime IS NOT NULL
ON CONFLICT (aid) DO UPDATE SET
abalance = EXCLUDED.abalance,
filler = EXCLUDED.filler
WHERE pgbench_accounts_new.abalance IS DISTINCT FROM EXCLUDED.abalance;
EOF
)
SQL_MERGE_DEL=$(cat << "EOF"
DELETE FROM pgbench_accounts_new
WHERE aid IN (
select aid from pgbench_accounts_delete_log
);
EOF
)
PGBENCH_DELETE=$(cat << "EOF"
\set aid random(1, 100000 * :scale)
DELETE FROM pgbench_accounts WHERE aid = :aid;
EOF
)
PGBENCH_SIMPLE_UPDATE=$(cat << "EOF"
-- simple-update: <builtin: simple update>
\set aid random(1, 100000 * :scale)
\set bid random(1, 1 * :scale)
\set tid random(1, 10 * :scale)
\set delta random(-5000, 5000)
BEGIN;
UPDATE pgbench_accounts SET abalance = abalance + :delta , mtime = now() WHERE aid = :aid;
SELECT abalance FROM pgbench_accounts WHERE aid = :aid;
INSERT INTO pgbench_history (tid, bid, aid, delta, mtime) VALUES (:tid, :bid, :aid, :delta, CURRENT_TIMESTAMP);
END;
EOF
)
PGBENCH_SIMPLE_INSERT=$(cat << "EOF"
\set aid random(1, 100000 * :scale)
\set bid random(1, 1 * :scale)
INSERT INTO pgbench_accounts (aid, bid, abalance, filler, mtime)
SELECT -1 * :aid, :bid, 666, 'filler', now()
WHERE NOT EXISTS (select * from pgbench_accounts where aid = -1 * :aid);
EOF
)
echo -e "*** STARTING `date` ***\n"
echo -e "\n* INIT pgbench_accounts *\n"
echo "pgbench -iq -s $SCALE --fillfactor $FF"
if [ -n "$UNLOGGED" ]; then
pgbench -iq --unlogged -s $SCALE --fillfactor $FF
else
pgbench -iq -s $SCALE --fillfactor $FF
fi
echo -e "\npsql -c \"\\dt+ pgbench_accounts\""
psql -c "\dt+ pgbench_accounts"
echo -e "\npgbench -N -M prepared -t $ROW_CHANGES &>/dev/null"
pgbench -N -M prepared -t $ROW_CHANGES &>/dev/null
echo -e "\npsql -c \"vacuum pgbench_accounts\""
psql -c "vacuum pgbench_accounts"
psql -c "create index on pgbench_accounts (bid)"
psql -c "create index on pgbench_accounts (bid, abalance)"
echo -e "\n* CREATE TEMP MIGRATION HELPERS - mtime + pgbench_accounts_delete_log + trigger *\n"
psql -c "alter table pgbench_accounts add mtime timestamp with time zone"
psql -c "create index on pgbench_accounts (mtime) where mtime is not null"
psql -c "drop table if exists pgbench_accounts_delete_log"
psql -c "create $UNLOGGED table pgbench_accounts_delete_log(aid int, deleted_on timestamptz not null default now())"
psql -c "create index on pgbench_accounts_delete_log(aid)"
psql -c "$SQL_DEL_TRG"
echo -e "\n* CLONING *\n"
psql -c "drop table if exists pgbench_accounts_new" -c "create $UNLOGGED table pgbench_accounts_new (LIKE pgbench_accounts)"
echo -e "\npsql -c \"insert into pgbench_accounts_new select * from pgbench_accounts\""
time psql -c "insert into pgbench_accounts_new select * from pgbench_accounts"
echo -e "\n* CLONE INDEXING *\n"
T1=$(date +%s)
psql -c "alter table pgbench_accounts_new add primary key (aid)"
psql -c "create index on pgbench_accounts_new (bid)"
psql -c "create index on pgbench_accounts_new (bid, abalance)"
T2=$(date +%s)
echo -e "\nClone indexed in $((T2-T1)) s\n"
echo -e "\n* SIMULATING APP WORK *\n"
echo "$PGBENCH_DELETE" > /tmp/pgbench_accounts_delete.pgbench
echo "$PGBENCH_SIMPLE_UPDATE" > /tmp/pgbench_accounts_simple_update.pgbench
echo "$PGBENCH_SIMPLE_INSERT" > /tmp/pgbench_accounts_insert.pgbench
echo -e "\npgbench -f /tmp/pgbench_accounts_simple_update.pgbench@10 -f /tmp/pgbench_accounts_delete.pgbench@1 -f /tmp/pgbench_accounts_insert.pgbench@1 -M prepared -t $ROW_CHANGES &>/dev/null"
pgbench -f /tmp/pgbench_accounts_simple_update.pgbench@10 -f /tmp/pgbench_accounts_delete.pgbench@1 -f /tmp/pgbench_accounts_insert.pgbench@1 -M prepared -t $ROW_CHANGES &>/dev/null
psql -c "analyze pgbench_accounts"
echo -e "\n* MERGING - DOWNTIME START *\n"
# TODO in real scenario here need to lock the table, kill apps or disable DML via triggers on pgbench_accounts
DT1=$(date +%s)
psql -c "$SQL_MERGE_INS_UPD"
psql -c "$SQL_MERGE_DEL"
DT2=$(date +%s)
echo -e "\nMerged in $((DT2-DT1)) s\n"
ROWS_OLD=$(psql -XAqtc "select count(*) from pgbench_accounts")
ROWS_NEW=$(psql -XAqtc "select count(*) from pgbench_accounts_new")
SUM_OLD=$(psql -XAqtc "select sum(abalance) from pgbench_accounts")
SUM_NEW=$(psql -XAqtc "select sum(abalance) from pgbench_accounts_new")
echo "ROWS_NEW $ROWS_NEW, ROWS_OLD $ROWS_OLD"
echo "SUM_NEW $SUM_NEW, SUM_OLD $SUM_OLD"
echo -e "\n* VACUUM FULL *"
psql -c "alter table pgbench_accounts set (fillfactor = 100);"
VF1=$(date +%s)
psql -c "vacuum full pgbench_accounts"
VF2=$(date +%s)
echo -e "\nVACUUM FULL duration $((VF2-VF1)) s\n"
echo -e "\n*** SCRIPT FINISHED on `date`***"
# TODO switch tables
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment