Skip to content

Instantly share code, notes, and snippets.

@mbourgon
Last active December 28, 2020 05:33
Show Gist options
  • Save mbourgon/9fea07d68cf3233190af to your computer and use it in GitHub Desktop.
Save mbourgon/9fea07d68cf3233190af to your computer and use it in GitHub Desktop.
T-SQL: A more efficient delete (uses TOP, CTE, OUTPUT, etc)
--mdb 2016/08/18 v1.10. Added a few things to make it faster and not delete the entire table. And some docs!
--No idea where the idea to walk the table comes from (Bertrand? Strate?); the premise was that if you knew
-- what you were currently deleting, and it was ordered, you could tell the optimizer to skip the part you've already deleted.
-- The overall delete for the top 5k from a CTE is from Simon Sabin & the SQLCat team.
--https://web.archive.org/web/20090630064721/http://blogs.msdn.com/sqlcat/archive/2009/05/21/fast-ordered-delete.aspx?
--http://sqlblogcasts.com/blogs/simons/archive/2009/05/22/DELETE-TOP-x-rows-avoiding-a-table-scan.aspx
IF OBJECT_ID('tempdb..#mydeleted') IS NOT NULL
DROP TABLE #mydeleted
CREATE TABLE #mydeleted
(insert_datetime DATETIME)
CREATE CLUSTERED INDEX cidx_mydeleted_insert_datetime ON #mydeleted (insert_datetime)
DECLARE @msg NVARCHAR(50),
@insert_datetime DATETIME,
@delete_before DATETIME
SELECT @delete_before = DATEADD(dd, DATEDIFF(dd,0,getdate()-14), 0)
SELECT @insert_datetime = ISNULL(MAX(INSERT_datetime), '1/1/2000') --ISNULL for the first run
FROM #mydeleted
SELECT 'Starting delete of data from ' +CONVERT(VARCHAR(20),@insert_datetime,120) + ' up to ' + CONVERT(VARCHAR(20),@delete_before,120) --DO NOT CHANGE TO PRINT
--sets @@ROWCOUNT
WHILE (
@@ROWCOUNT <> 0
AND GETDATE() < '20160818 23:50:00' --so it stops before backups run, preventing a TLOG blowout
AND @insert_datetime <= @delete_before
)
BEGIN
--get that most recent deleted!
SELECT @insert_datetime = ISNULL(MAX(INSERT_datetime), '1/1/2000') --ISNULL for the first run
FROM #mydeleted
TRUNCATE TABLE #mydeleted
--informative so you know where you are in the process.
PRINT @insert_datetime
--this SHOULD force SSMS to tell you almost instantly when it's doing another batch.
--http://thebakingdba.blogspot.com/2011/11/use-raiserror-to-return-results-in-ssms.html
SET @msg = ( SELECT CONVERT( VARCHAR (19), GETDATE (), 120) )
RAISERROR (@msg, 0, 1) WITH NOWAIT
--1 second between batches to prevent too much contention
WAITFOR DELAY '00:00:01';
WITH cte
AS (
SELECT TOP (5000) *
--my EPR saves this daily so I can do trending
FROM dm_db_index_usage_stats_hist
--databases that I can't change and that are REALLY chatty
WHERE database_name IN ('TableA', 'TableB')
AND Insert_Datetime >= @insert_datetime
AND insert_datetime < @delete_before
ORDER BY insert_datetime
)
DELETE FROM cte
OUTPUT deleted.Insert_Datetime
INTO #mydeleted
END
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment