Last active
June 20, 2025 17:35
-
-
Save BrentOzar/9488ef7039b778de7162884306f6a59d to your computer and use it in GitHub Desktop.
The Ascending Key Problem
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
The Ascending Key Problem | |
Brent Ozar - https://www.BrentOzar.com/go/ascending | |
v1.4 - 2025-06-20 | |
This demo requires: | |
* Any supported version of SQL Server | |
* A large Stack Overflow database: https://www.BrentOzar.com/go/querystack | |
This first RAISERROR is just to make sure you don't accidentally hit F5 and | |
run the entire script. You don't need to run this: | |
*/ | |
RAISERROR(N'Oops! No, don''t just hit F5. Run these demos one at a time.', 20, 1) WITH LOG; | |
GO | |
/* Prepare the database, takes 5-10 minutes depending | |
on your hardware & database size. */ | |
USE StackOverflow; | |
GO | |
DropIndexes; | |
GO | |
ALTER DATABASE [StackOverflow] SET COMPATIBILITY_LEVEL = 110 /* 2012 */ | |
DBCC TRACEOFF(2389); | |
DBCC TRACEOFF(4139); | |
GO | |
CREATE INDEX LastAccessDate ON dbo.Users(LastAccessDate); | |
CREATE INDEX Reputation ON dbo.Users(Reputation); | |
CREATE INDEX OwnerUserId ON dbo.Posts(OwnerUserId); | |
CREATE INDEX Score ON dbo.Posts(Score); | |
GO | |
CREATE OR ALTER PROC dbo.UserLogin @LoginDate DATETIME AS | |
BEGIN | |
UPDATE dbo.Users | |
SET LastAccessDate = DATEADD(SECOND, CAST(RAND(CHECKSUM(NEWID())) AS FLOAT) * 86400, @LoginDate ) | |
WHERE Id = ABS(CHECKSUM(NEWID())) % 10000000 + 1 | |
END | |
GO | |
/* Check out data distribution on Users.LastAccessDate: */ | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
/* A more human-readable way to see the distribution: */ | |
SELECT FORMAT(LastAccessDate, 'yyyy-MM') AS LastAccessDate, | |
SUM(1) AS Recs | |
FROM dbo.Users | |
GROUP BY FORMAT(LastAccessDate, 'yyyy-MM') | |
ORDER BY FORMAT(LastAccessDate, 'yyyy-MM'); | |
GO | |
/* Top users in the database right now: */ | |
SELECT TOP 100 * FROM dbo.Users ORDER BY LastAccessDate DESC; | |
GO | |
/* If I ask for the most recent users: */ | |
SELECT COUNT(*) FROM dbo.Users | |
WHERE LastAccessDate >= '2024-04-07' | |
OPTION (RECOMPILE); | |
GO | |
/* SQL Server assumes there is only 1 row, because it | |
doesn't know (yet) that the data is constantly ascending. | |
Let's simulate thousands of logins with SQLQueryStress: */ | |
CREATE OR ALTER PROC dbo.UserLogin @LoginDate DATETIME AS | |
BEGIN | |
UPDATE dbo.Users | |
SET LastAccessDate = DATEADD(SECOND, CAST(RAND(CHECKSUM(NEWID())) AS FLOAT) * 86400, @LoginDate ) | |
WHERE Id = ABS(CHECKSUM(NEWID())) % 10000000 + 1 | |
END | |
GO | |
/* SQLQueryStress: 1000 executions times 10 threads | |
of EXEC UserLogin '2024-04-07' */ | |
/* Check our estimates again: */ | |
/* If I ask for the most recent users: */ | |
SELECT COUNT(*) FROM dbo.Users | |
WHERE LastAccessDate >= '2024-04-07' | |
OPTION (RECOMPILE); | |
GO | |
/* Since 2005, SQL Server has watched stats updates to | |
figure out if a stat is having an ascending key problem. | |
You can see it via undocumented trace flag 2388: */ | |
DBCC TRACEON(2388); | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
DBCC TRACEOFF(2388); | |
/* There's a row in there for each stats update. | |
Update stats manually: */ | |
UPDATE STATISTICS dbo.Users(LastAccessDate) WITH FULLSCAN; | |
GO | |
/* Check again: */ | |
DBCC TRACEON(2388); | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
DBCC TRACEOFF(2388); | |
/* Then kick off another round of logins, | |
but this time for 2024-04-08: | |
EXEC UserLogin '2024-04-08' | |
Then update stats again and check the leading column type: | |
*/ | |
UPDATE STATISTICS dbo.Users(LastAccessDate) WITH FULLSCAN; | |
GO | |
/* Check again: */ | |
DBCC TRACEON(2388); | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
DBCC TRACEOFF(2388); | |
/* Then kick off another round of logins, | |
but this time for 2024-04-09: | |
EXEC UserLogin '2024-04-09' | |
Then update stats again and check the leading column type: | |
*/ | |
UPDATE STATISTICS dbo.Users(LastAccessDate) WITH FULLSCAN; | |
GO | |
/* Check again: */ | |
DBCC TRACEON(2388); | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
DBCC TRACEOFF(2388); | |
/* Voila! SQL Server has figured out that this statistic | |
is suffering from the Ascending Key Problem. | |
But does that by itself help anything? | |
Kick off another round of logins, this time for 2024-04-10: | |
EXEC UserLogin '2024-04-10' | |
Then test our query: | |
*/ | |
SELECT COUNT(*) FROM dbo.Users | |
WHERE LastAccessDate >= '2024-04-10' | |
OPTION (RECOMPILE); | |
GO | |
/* Who cares though? | |
So what if the estimate is bad? | |
Well, let's add another 10K * 10 logins (100K | |
altogether), using SQLQueryStress: | |
EXEC UserLogin '2024-04-10' | |
Then let's simulate a reporting query that adds | |
just one join to another table: */ | |
CREATE OR ALTER PROC dbo.usp_Report | |
@LastAccessDateStart DATETIME, | |
@LastAccessDateEnd DATETIME | |
AS | |
SELECT TOP 101 u.LastAccessDate, u.DisplayName, | |
u.Location, p.Score, p.Title | |
FROM dbo.Users u | |
INNER JOIN dbo.Posts p ON u.Id = p.OwnerUserId | |
WHERE u.LastAccessDate >= @LastAccessDateStart | |
AND u.LastAccessDate < @LastAccessDateEnd | |
ORDER BY p.Score DESC; | |
GO | |
/* That report is driven off Users.LastAccessDate: */ | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
/* If we call it for a day with very little data: */ | |
EXEC dbo.usp_Report '2008-08-01', '2008-08-02' WITH RECOMPILE; | |
/* For a day with a lot of user logins: */ | |
EXEC dbo.usp_Report '2024-04-06', '2024-04-07' WITH RECOMPILE; | |
/* For a more recent date that doesn't have data yet...: */ | |
EXEC dbo.usp_Report '2025-01-01', '2025-01-02' WITH RECOMPILE; | |
GO | |
/* For a date when the stats are wrong: */ | |
EXEC dbo.usp_Report '2024-04-10', '2024-04-11' WITH RECOMPILE; | |
GO | |
/* So far, I've been using 2012 compatibility level. | |
Let's try 2014: */ | |
ALTER DATABASE [StackOverflow] SET COMPATIBILITY_LEVEL = 120 /* 2014 */ | |
GO | |
EXEC dbo.usp_Report '2024-04-10', '2024-04-11' WITH RECOMPILE; | |
GO | |
/* Try 2025: */ | |
ALTER DATABASE [StackOverflow] SET COMPATIBILITY_LEVEL = 170 /* 2025 */ | |
GO | |
EXEC dbo.usp_Report '2024-04-10', '2024-04-11' WITH RECOMPILE; | |
GO | |
/* In theory, SQL Server's automatic stats updates | |
will kick in eventually, fixing this problem. | |
How many updates will have to be done before | |
automatic stats updates can be triggered? | |
Here's the formula: | |
https://learn.microsoft.com/en-us/sql/relational-databases/statistics/statistics#statistics-options | |
Get the number of rows in the object: | |
*/ | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
DECLARE @Rows BIGINT = 22484235; | |
SELECT CAST(LEAST(500 + (0.20 * @Rows), SQRT(1000 * @Rows)) AS INT) | |
/* How many rows have been modified so far? Check modification_counter. */ | |
SELECT sp.* | |
FROM sys.stats s | |
CROSS APPLY sys.dm_db_stats_properties (object_id('dbo.Users'), s.stats_id) sp | |
WHERE s.object_id = object_id('dbo.Users') | |
AND s.name = 'LastAccessDate'; | |
/* Use SQLQueryStress to update another ~70,000 rows (to be safe) | |
and check the modification counter again to see if we're up | |
over the threshold: */ | |
SELECT sp.* | |
FROM sys.stats s | |
CROSS APPLY sys.dm_db_stats_properties (object_id('dbo.Users'), s.stats_id) sp | |
WHERE s.object_id = object_id('dbo.Users') | |
AND s.name = 'LastAccessDate'; | |
/* Try our query again: */ | |
ALTER DATABASE [StackOverflow] SET COMPATIBILITY_LEVEL = 170 | |
GO | |
SELECT COUNT(*) FROM dbo.Users | |
WHERE LastAccessDate >= '2024-04-10' | |
OPTION (RECOMPILE); | |
GO | |
/* Well... it's not one row, but it's not accurate | |
either. We're getting sampling: */ | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
GO | |
/* We can get that to be more accurate by using | |
FULLSCAN, and making sure that all future updates | |
automatically use FULLSCAN too. | |
Update another 10K rows for the next day (I'm not | |
going to explain why it's the next day just yet): | |
EXEC UserLogin '2024-04-11' | |
Then update stats with fullscan, persist: */ | |
UPDATE STATISTICS dbo.Users(LastAccessDate) | |
WITH FULLSCAN, PERSIST_SAMPLE_PERCENT = ON; | |
/* Check the stat's persisted_sample_percent: */ | |
SELECT sp.* | |
FROM sys.stats s | |
CROSS APPLY sys.dm_db_stats_properties (object_id('dbo.Users'), s.stats_id) sp | |
WHERE s.object_id = object_id('dbo.Users') | |
AND s.name = 'LastAccessDate'; | |
/* Now that we've had FULLSCAN, are the estimates accurate? */ | |
ALTER DATABASE [StackOverflow] SET COMPATIBILITY_LEVEL = 170 /* 2025 */ | |
GO | |
SELECT COUNT(*) FROM dbo.Users | |
WHERE LastAccessDate >= '2024-04-11' | |
OPTION (RECOMPILE); | |
GO | |
/* Why aren't the statistics accurate? */ | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
/* We're hitting the 201 buckets problem. */ | |
/* Back to the Ascending Key Problem. | |
SQL Server knows no rows have been modified since | |
the statistics update: */ | |
SELECT sp.* | |
FROM sys.stats s | |
CROSS APPLY sys.dm_db_stats_properties (object_id('dbo.Users'), s.stats_id) sp | |
WHERE s.object_id = object_id('dbo.Users') | |
AND s.name = 'LastAccessDate'; | |
/* With 0 modified rows, what happens if we query | |
for a future date where we don't have data yet? */ | |
SELECT COUNT(*) FROM dbo.Users | |
WHERE LastAccessDate >= '2024-04-12' | |
OPTION (RECOMPILE); | |
GO | |
/* Modify 10K rows with SQLQueryStress for 2024-04-12: | |
EXEC UserLogin '2024-04-12' | |
Then query again: */ | |
SELECT COUNT(*) FROM dbo.Users | |
WHERE LastAccessDate >= '2024-04-12' | |
OPTION (RECOMPILE); | |
GO | |
/* The estimate isn't accurate because: | |
* The key is marked ascending | |
* The modification_counter > 0 | |
* SQL Server assumes that some of those modified rows | |
must be ascending beyond the histogram's highest step | |
*/ | |
DBCC TRACEON(2388); | |
DBCC SHOW_STATISTICS('dbo.Users', 'LastAccessDate'); | |
DBCC TRACEOFF(2388); | |
SELECT sp.* | |
FROM sys.stats s | |
CROSS APPLY sys.dm_db_stats_properties (object_id('dbo.Users'), s.stats_id) sp | |
WHERE s.object_id = object_id('dbo.Users') | |
AND s.name = 'LastAccessDate'; | |
/* | |
License: Creative Commons Attribution-ShareAlike 4.0 Unported (CC BY-SA 4.0) | |
More info: https://creativecommons.org/licenses/by-sa/4.0/ | |
You are free to: | |
* Share - copy and redistribute the material in any medium or format | |
* Adapt - remix, transform, and build upon the material for any purpose, even | |
commercially | |
Under the following terms: | |
* Attribution - You must give appropriate credit, provide a link to the license, | |
and indicate if changes were made. You may do so in any reasonable manner, | |
but not in any way that suggests the licensor endorses you or your use. | |
* ShareAlike - If you remix, transform, or build upon the material, you must | |
distribute your contributions under the same license as the original. | |
* No additional restrictions — You may not apply legal terms or technological | |
measures that legally restrict others from doing anything the license permits. | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment