Last active
December 16, 2015 18:19
-
-
Save royseto/5476480 to your computer and use it in GitHub Desktop.
Dedup a relational table using SQL delete (tested with PostgreSQL)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
roy-setos-macbook:~ royseto$ ssh -i ~/.ec2/rs-keypair-2.pem [email protected] | |
Welcome to Postgresql, TurnKey Linux 12.0 / Debian 6.0.5 Squeeze | |
System information (as of Sun Apr 28 09:57:02 2013) | |
System load: 0.00 Memory usage: 41% | |
Processes: 62 Swap usage: 0% | |
Usage of /: 7.1% of 9.84GB IP address for eth0: 10.197.60.2 | |
TKLBAM (Backup and Migration): NOT INITIALIZED | |
To initialize TKLBAM, run the "tklbam-init" command to link this | |
system to your TurnKey Hub account. For details see the man page or | |
go to: | |
http://www.turnkeylinux.org/tklbam | |
Last login: Sun Apr 28 09:35:10 2013 from 24.7.89.229 | |
root@postgresql ~# psql postgres postgres | |
psql (8.4.17) | |
Type "help" for help. | |
postgres=# CREATE TABLE t (a INTEGER, b INTEGER); | |
CREATE TABLE | |
postgres=# INSERT INTO t VALUES (1, 2); -- nondup row | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (2, 5); -- nondup row #2 | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (3, 7); -- row with 2 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (3, 7); -- row with 2 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (4, 2); -- row with 3 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (4, 2); -- row with 3 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (4, 2); -- row with 3 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (5, null); -- nondup row containing a null value | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (6, null); -- dup row containing a null value | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (6, null); -- dup row containing a null value | |
INSERT 0 1 | |
postgres=# SELECT * FROM t; | |
a | b | |
---+--- | |
1 | 2 | |
2 | 5 | |
3 | 7 | |
3 | 7 | |
4 | 2 | |
4 | 2 | |
4 | 2 | |
5 | | |
6 | | |
6 | | |
(10 rows) | |
postgres=# -- remove duplicate rows from table t | |
postgres=# DELETE FROM t | |
postgres-# WHERE ctid NOT IN ( | |
postgres(# SELECT MIN(ctid) FROM t GROUP BY a, b | |
postgres(# ); | |
DELETE 4 | |
postgres=# SELECT * FROM t; -- duplicates should be gone now | |
a | b | |
---+--- | |
1 | 2 | |
2 | 5 | |
3 | 7 | |
4 | 2 | |
5 | | |
6 | | |
(6 rows) | |
postgres=# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
postgres=# TRUNCATE TABLE t; | |
TRUNCATE TABLE | |
postgres=# INSERT INTO t VALUES (1, 2); -- nondup row | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (2, 5); -- nondup row #2 | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (3, 7); -- row with 2 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (3, 7); -- row with 2 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (4, 2); -- row with 3 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (4, 2); -- row with 3 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (4, 2); -- row with 3 dups | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (5, null); -- nondup row containing a null value | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (6, null); -- dup row containing a null value | |
INSERT 0 1 | |
postgres=# INSERT INTO t VALUES (6, null); -- dup row containing a null value | |
INSERT 0 1 | |
postgres=# SELECT * FROM t; | |
a | b | |
---+--- | |
1 | 2 | |
2 | 5 | |
3 | 7 | |
3 | 7 | |
4 | 2 | |
4 | 2 | |
4 | 2 | |
5 | | |
6 | | |
6 | | |
(10 rows) | |
postgres=# SELECT a, b, ROW_NUMBER() OVER (PARTITION BY a, b ORDER BY (SELECT 0)) rn | |
postgres-# FROM t; | |
a | b | rn | |
---+---+---- | |
1 | 2 | 1 | |
2 | 5 | 1 | |
3 | 7 | 1 | |
3 | 7 | 2 | |
4 | 2 | 1 | |
4 | 2 | 2 | |
4 | 2 | 3 | |
5 | | 1 | |
6 | | 1 | |
6 | | 2 | |
(10 rows) | |
postgres=# WITH cte | |
postgres-# AS (SELECT ROW_NUMBER() OVER (PARTITION BY A, B | |
postgres(# ORDER BY (SELECT 0)) RN | |
postgres(# FROM T) | |
postgres-# DELETE FROM cte | |
postgres-# WHERE RN > 1; | |
ERROR: syntax error at or near "DELETE" | |
LINE 5: DELETE FROM cte | |
^ | |
postgres=# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment