Last active
September 30, 2016 09:55
-
-
Save klpx/0b517d838d094415e05be34b707488f6 to your computer and use it in GitHub Desktop.
Postgres migration to new unique index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
We have: | |
*/ | |
CREATE TABLE data (id BIGINT, name VARCHAR); | |
CREATE UNIQUE INDEX ON data (name); | |
INSERT INTO data (id, name) VALUES (1, 'Alex'), (2, 'Bob'), (3, 'alex'), (4, 'AleX'); | |
/* | |
If we put (3, 'Alex') then we get error: | |
*/ | |
INSERT INTO data (id, name) VALUES (5, 'Alex'); | |
--- ERROR: duplicate key value violates unique constraint "data_name_idx" | |
--- DETAIL: Key (name)=(Alex) already exists. | |
/* | |
But we decide that our unique constraint is not so strict and want to migrate and merge entries for new unique constaint | |
Just that is not working | |
*/ | |
CREATE UNIQUE INDEX ON data ((lower(name))); | |
--- ERROR: could not create unique index "data_lower_idx" | |
--- DETAIL: Key (lower(name::text))=(alex) is duplicated. | |
/* | |
Decision is to move duplicates to temp table and then merge it | |
Duplicates we can select using Window Functions | |
*/ | |
WITH duplicates AS (SELECT id, name, row_number() over (PARTITION BY lower(name) ORDER BY id ASC) FROM data) | |
SELECT * FROM duplicates WHERE row_number <> 1; | |
--- id | name | row_number | |
--- ----+------+------------ | |
--- 3 | alex | 2 | |
--- 4 | AleX | 3 | |
--- (2 rows) | |
/* | |
Copy it to special table | |
*/ | |
CREATE TABLE duplicates (duplicate_id BIGINT, original_id BIGINT); | |
INSERT INTO duplicates (duplicate_id, original_id) ( | |
WITH marked AS ( | |
SELECT id, name, | |
row_number() over (PARTITION BY lower(name) | |
ORDER BY id ASC) -- order is important, here | |
FROM variation | |
) | |
SELECT dupl.id, origin.id | |
FROM marked origin | |
LEFT JOIN marked dupl | |
ON lower(dupl.name) = lower(origin.name) -- our unique condition | |
AND dupl.row_number > 1 | |
AND origin.row_number = 1 | |
WHERE origin.row_number = 1 and dupl.row_number > 1); | |
... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment