Created
September 11, 2012 17:28
-
-
Save Dillie-O/3700029 to your computer and use it in GitHub Desktop.
Advanced Remove Duplicate Row Query in SQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Algorithm -- | |
-- 1. Run query to get all duplicate count values. Since we're getting cloned | |
-- rows, we can run the query based off of all fields in the table. Store | |
-- in table variable. | |
-- | |
-- 2. Iterate through table. | |
-- | |
-- 2a. Use DELETE TOP(x - 1) remove all duplicates. | |
-- | |
-- 3. Done | |
DECLARE @RowsToProcess int | |
DECLARE @CurrentRow int | |
DECLARE @Message varchar(MAX) | |
DECLARE @duplicate_count int | |
DECLARE @application_id int | |
DECLARE @lock_version int | |
DECLARE @DuplicateVersions TABLE | |
( | |
row_id int NOT NULL PRIMARY KEY IDENTITY(1,1), | |
duplicate_count int, | |
application_id int, | |
lock_version int, | |
asset_id varchar(50), | |
customer_id int, | |
received_date datetime, | |
fee_amount money, | |
created_at datetime, | |
updated_at datetime | |
) | |
INSERT INTO @DuplicateVersions | |
( | |
duplicate_count, | |
application_id, | |
lock_version, | |
asset_id, | |
customer_id, | |
received_date, | |
fee_amount, | |
created_at, | |
updated_at | |
) | |
SELECT COUNT(*) AS duplicate_count, | |
[application_id], | |
[lock_version], | |
[asset_id], | |
[customer_id], | |
[received_date], | |
[fee_amount], | |
[created_at], | |
[updated_at] | |
FROM application_versions | |
GROUP BY [application_id], | |
[lock_version], | |
[asset_id], | |
[customer_id], | |
[received_date], | |
[fee_amount], | |
[created_at], | |
[updated_at] | |
HAVING COUNT(*) > 1 | |
ORDER BY [application_id] ASC, [lock_version] ASC | |
SELECT @RowsToProcess = COUNT(*) FROM @DuplicateVersions | |
SET @Message = 'Duplicate rows retrieved. Records to process: ' + CAST(@RowsToProcess AS VARCHAR) | |
RAISERROR (@Message, 10, 1) WITH NOWAIT | |
SET @CurrentRow = 1 | |
-- Since we're using a DELETE TOP (x) command and we cannot pass a column name | |
-- into the x parameter, we need to iterate through the table of duplicates and | |
-- call the delete statement, making sure to join all column names in our JOIN | |
-- clause since we are comparing against all values. | |
WHILE @CurrentRow <= @RowsToProcess | |
BEGIN | |
SELECT @duplicate_count = duplicate_count, | |
@application_id = application_id, | |
@lock_version = lock_version | |
FROM @DuplicateVersions AS DuplicateVersions | |
WHERE row_id = @CurrentRow | |
SET @Message = '[' + CAST(@CurrentRow AS VARCHAR) + ' of ' + CAST(@RowsToProcess AS VARCHAR) + '] AppID: ' + CAST(@application_id AS VARCHAR) + ' / Lock: ' + CAST(@lock_version AS VARCHAR) + ' - ' + CAST((@duplicate_count - 1) AS VARCHAR) + ' Duplicate(s)' | |
RAISERROR (@Message, 10, 1) WITH NOWAIT | |
DELETE TOP (@duplicate_count - 1) application_versions | |
FROM application_versions | |
RIGHT JOIN @DuplicateVersions AS DuplicateVersions | |
ON ISNULL(DuplicateVersions.application_id, -999) = ISNULL(application_versions.application_id, -999) | |
AND ISNULL(DuplicateVersions.lock_version, -999) = ISNULL(application_versions.lock_version, -999) | |
AND ISNULL(DuplicateVersions.asset_id, -999) = ISNULL(application_versions.asset_id, -999) | |
AND ISNULL(DuplicateVersions.customer_id, -999) = ISNULL(application_versions.customer_id, -999) | |
AND ISNULL(DuplicateVersions.received_date, -999) = ISNULL(application_versions.received_date, -999) | |
AND ISNULL(DuplicateVersions.fee_amount, -999) = ISNULL(application_versions.fee_amount, -999) | |
AND ISNULL(DuplicateVersions.created_at, -999) = ISNULL(application_versions.created_at, -999) | |
AND ISNULL(DuplicateVersions.updated_at, -999) = ISNULL(application_versions.updated_at, -999) | |
WHERE DuplicateVersions.row_id = @CurrentRow | |
SET @CurrentRow = @CurrentRow + 1 | |
END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment