Last active
December 18, 2015 00:59
-
-
Save Dillie-O/5700776 to your computer and use it in GitHub Desktop.
Refactored Advanced Remove Duplicate Row Script in SQL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Algorithm -- | |
-- 1. Run query to get all duplicate count values. Since we're getting cloned | |
-- rows, we can run the query based off of all fields in the table. Store | |
-- in table variable. | |
-- | |
-- 2. Iterate through table. | |
-- | |
-- 2a. Use DELETE TOP(x - 1) remove all duplicates. | |
-- | |
-- 3. Done | |
DECLARE @RowsToProcess int | |
DECLARE @CurrentRow int | |
DECLARE @Message varchar(MAX) | |
DECLARE @duplicate_count int | |
DECLARE @application_id int | |
DECLARE @lock_version int | |
DECLARE @asset_id varchar(50), | |
DECLARE @customer_id int, | |
DECLARE @received_date datetime, | |
DECLARE @fee_amount money, | |
DECLARE @created_at datetime, | |
DECLARE @updated_at datetime | |
DECLARE @DuplicateVersions TABLE | |
( | |
row_id int NOT NULL PRIMARY KEY IDENTITY(1,1), | |
duplicate_count int, | |
application_id int, | |
lock_version int, | |
asset_id varchar(50), | |
customer_id int, | |
received_date datetime, | |
fee_amount money, | |
created_at datetime, | |
updated_at datetime | |
) | |
INSERT INTO @DuplicateVersions | |
( | |
duplicate_count, | |
application_id, | |
lock_version, | |
asset_id, | |
customer_id, | |
received_date, | |
fee_amount, | |
created_at, | |
updated_at | |
) | |
SELECT COUNT(application_id) AS duplicate_count, | |
[application_id], | |
[lock_version], | |
[asset_id], | |
[customer_id], | |
[received_date], | |
[fee_amount], | |
[created_at], | |
[updated_at] | |
FROM application_versions | |
GROUP BY [application_id], | |
[lock_version], | |
[asset_id], | |
[customer_id], | |
[received_date], | |
[fee_amount], | |
[created_at], | |
[updated_at] | |
HAVING COUNT(application_id) > 1 | |
ORDER BY [application_id] ASC, [lock_version] ASC | |
SELECT @RowsToProcess = COUNT(row_id) FROM @DuplicateVersions | |
SET @Message = 'Duplicate rows retrieved. Records to process: ' + CAST(@RowsToProcess AS VARCHAR) | |
RAISERROR (@Message, 10, 1) WITH NOWAIT | |
SET @CurrentRow = 1 | |
WHILE @CurrentRow <= @RowsToProcess | |
BEGIN | |
SELECT @duplicate_count = duplicate_count, | |
@application_id = application_id, | |
@lock_version = lock_version, | |
@asset_id = asset_id, | |
@customer_id = customer_id, | |
@received_date = received_date, | |
@fee_amount = fee_amount, | |
@created_at = created_at, | |
@updated_at = updated_at | |
FROM @DuplicateVersions AS DuplicateVersions | |
WHERE row_id = @CurrentRow | |
SET @Message = '[' + CAST(@CurrentRow AS VARCHAR) + ' of ' + CAST(@RowsToProcess AS VARCHAR) + '] AppID: ' + CAST(@application_id AS VARCHAR) + ' / Lock: ' + CAST(@lock_version AS VARCHAR) + ' - ' + CAST((@duplicate_count - 1) AS VARCHAR) + ' Duplicate(s)' | |
RAISERROR (@Message, 10, 1) WITH NOWAIT | |
-- Since we're using a DELETE TOP (x) command is handled by selecting the | |
-- proper duplicate count - 1 based on our initial query. Since we do this | |
-- as a subquery within our DELETE statement we can match based on column, | |
-- making sure to test for NULL values. | |
DELETE application_versions FROM | |
( | |
SELECT TOP (@duplicate_count - 1) id | |
FROM application_versions | |
WHERE (@application_id IS NULL OR application_id = @application_id) | |
AND (@lock_version IS NULL OR lock_version = @lock_version) | |
AND (@asset_id IS NULL OR asset_id = @asset_id) | |
AND (@customer_id IS NULL OR customer_id = @customer_id) | |
AND (@received_date IS NULL OR received_date = @received_date) | |
AND (@fee_amount IS NULL OR fee_amount = @fee_amount) | |
AND (@created_at IS NULL OR created_at = @created_at) | |
AND (@updated_at IS NULL OR updated_at = @updated_at) | |
) AS t1 | |
WHERE application_versions.id = t1.id | |
SET @CurrentRow = @CurrentRow + 1 | |
END |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment