Last active
December 21, 2015 14:58
-
-
Save doug-wade/6323213 to your computer and use it in GitHub Desktop.
Gets the selectivity of all columns in a table or view in a MS SQL database.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if exists | |
( | |
select * | |
from sys.objects | |
where object_id = OBJECT_ID(N'[dbo].[getColumnSelectivity]') | |
) | |
begin | |
drop procedure dbo.getColumnSelectivity | |
end | |
go | |
create procedure dbo.getColumnSelectivity | |
@table nvarchar(256) | |
as | |
begin | |
if OBJECT_ID(@table) is null | |
begin | |
return | |
end | |
declare @columnsStatement nvarchar(max) = N'select columns.name | |
from sys.columns | |
inner join sys.objects | |
on objects.object_id = columns.object_id | |
where objects.name = @tableName' | |
declare @rowCountStatement nvarchar(max) = N'select @rowCountOUT = count(*) from ' + quotename(@table) | |
declare @columnCountStatement nvarchar(max); | |
declare @columnsTable table | |
( | |
column_to_test varchar(max) | |
,number_of_distinct_values int | |
) | |
declare @rowCount int | |
--This table contains one row per column in the table. | |
insert into @columnsTable | |
( | |
column_to_test | |
) | |
execute sp_executesql @columnsStatement, N'@tableName nvarchar(256)', @tableName = @table | |
--Selectivity is number of distinct values / total row count, so this will be the denominator | |
exec sp_executesql @rowCountStatement, N'@rowCountOUT int OUTPUT', @rowCountOUT = @rowCount OUTPUT | |
--Check each columns selectivity in turn and record it in the table | |
declare columnsCursor cursor | |
for | |
select column_to_test | |
from @columnsTable | |
declare @nextcolumn varchar(max), @nextCount int | |
open columnsCursor | |
fetch next from columnsCursor into @nextcolumn | |
while (@@FETCH_STATUS <> -1) --cursor has reached the end of the array | |
begin | |
set @columnCountStatement = N'select @columnCountOUT = count(distinct ' + quotename(@nextcolumn) + ') from ' + quotename(@table) | |
exec sp_executesql @columnCountStatement | |
,N'@columnCountOUT int OUTPUT' | |
,@columnCountOUT = @nextCount OUTPUT | |
update @columnsTable | |
set number_of_distinct_values = @nextCount | |
where column_to_test = @nextcolumn | |
fetch next from columnsCursor into @nextcolumn | |
end | |
close columnsCursor | |
deallocate columnsCursor | |
select column_to_test | |
,cast(number_of_distinct_values as float) / cast(@rowCount as float) as selectivity | |
from @columnsTable | |
order by cast(number_of_distinct_values as float) / cast(@rowCount as float) desc | |
end | |
go |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment