Last active
May 18, 2021 16:45
-
-
Save matthew-n/663c8e34247d416c82f247937a47aab6 to your computer and use it in GitHub Desktop.
metadata tools
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE TABLE ##sample_set ( | |
sample_set_id INT NOT NULL IDENTITY(1,1), | |
[database_id] INT NOT NULL, | |
[database_name] NVARCHAR(128) NOT NULL, | |
[table_id] INT NOT NULL, | |
[table_name] NVARCHAR(128) NOT NULL, | |
[column_id] INT NOT NULL, | |
[column_name] NVARCHAR(128) NOT NULL, | |
[stats_id] INT NULL, | |
[status_name] NVARCHAR(128) NULL, | |
[last_update] DATETIME2(7) NULL | |
PRIMARY KEY (sample_set_id), | |
UNIQUE (database_id, table_id, column_id) | |
); | |
CREATE TABLE ##samples ( | |
sample_set_id INT NOT NULL, | |
sample_num INT NOT NULL, | |
sample_value NVARCHAR(4000), | |
PRIMARY KEY(sample_set_id, sample_num) | |
); | |
WITH filtered_columns (object_id,column_id,name) AS | |
( | |
SELECT | |
object_id, | |
column_id, | |
name | |
FROM sys.columns AS c | |
WHERE | |
c.is_identity = 0 AND | |
NOT EXISTS ( | |
-- the column has a fk to an identity column | |
SELECT 1 | |
FROM sys.foreign_key_columns AS fkc | |
JOIN sys.columns AS ref_c ON fkc.referenced_object_id = ref_c.object_id AND fkc.referenced_column_id = ref_c.column_id | |
WHERE | |
fkc.parent_object_id = c.object_id AND | |
fkc.parent_column_id = c.column_id AND | |
ref_c.is_identity=1 | |
) | |
) | |
INSERT INTO ##sample_set | |
SELECT | |
DB_ID() AS [database_id], | |
DB_NAME() AS [database_name], | |
tmp.table_id, | |
tmp.table_name, | |
tmp.column_id, | |
tmp.column_name, | |
tmp.stats_id, | |
tmp.status_name, | |
tmp.last_updated | |
FROM ( | |
SELECT | |
tbl.object_id AS [table_id], | |
tbl.name AS [table_name], | |
c.column_id, | |
c.name AS [column_name], | |
s.stats_id, | |
s.name AS [status_name], | |
sp.last_updated, | |
--priorites the stats a column occurs in pk > index > auto-generated | |
ROW_NUMBER()OVER(PARTITION BY c.object_id, c.column_id ORDER BY s.stats_id) AS rn | |
FROM sys.tables AS tbl | |
JOIN filtered_columns AS c ON c.object_id=tbl.object_id | |
LEFT JOIN ( | |
sys.stats AS s | |
JOIN sys.stats_columns AS sc ON sc.object_id = s.object_id AND sc.stats_id = s.stats_id | |
) ON | |
s.object_id = tbl.object_id AND sc.column_id = c.column_id | |
-- we are instested in the first column of read and complete stats | |
AND sc.stats_column_id = 1 AND s.is_temporary = 0 AND s.has_filter = 0 | |
LEFT JOIN sys.indexes AS i ON i.object_id = tbl.object_id | |
-- stats created by index have this quality | |
AND s.stats_id = i.index_id | |
OUTER APPLY sys.dm_db_stats_properties(s.object_id, s.stats_id) sp | |
) AS tmp | |
WHERE | |
tmp.rn = 1 | |
/* declare variables */ | |
DECLARE | |
@sample_set_id INT, | |
@table_id INT, | |
@stats_id INT; | |
DECLARE sample_set CURSOR FAST_FORWARD READ_ONLY FOR | |
SELECT sample_set_id, table_id, stats_id | |
FROM ##sample_set | |
WHERE | |
stats_id IS NOT NULL AND | |
sample_set_id > COALESCE((SELECT MAX(sample_set_id) FROM ##samples),0) | |
OPEN sample_set | |
FETCH NEXT FROM sample_set INTO @sample_set_id, @table_id, @stats_id | |
WHILE @@FETCH_STATUS = 0 | |
BEGIN | |
INSERT INTO ##samples | |
SELECT | |
@sample_set_id, | |
b.step_number AS sample_number, | |
CAST(b.range_high_key AS NVARCHAR(4000)) AS sample_value | |
FROM sys.dm_db_stats_histogram(@table_id,@stats_id)AS b | |
FETCH NEXT FROM sample_set INTO @sample_set_id, @table_id, @stats_id | |
END | |
CLOSE sample_set | |
DEALLOCATE sample_set | |
/* declare variables */ | |
DECLARE @stmt NVARCHAR(4000); | |
DECLARE take_samples CURSOR FAST_FORWARD READ_ONLY FOR | |
SELECT | |
CONCAT('select ',sample_set_id,', row_number() over(order by ',a.column_name,') , cast(',a.column_name,' as nvarchar(4000)) as sample_value from ',table_name,' tablesample (200 rows)') | |
FROM ##sample_set AS a | |
WHERE | |
a.database_id = DB_ID() AND | |
stats_id IS NULL AND | |
NOT EXISTS (SELECT 1 FROM ##samples AS b WHERE b.sample_set_id = a.sample_set_id); | |
OPEN take_samples | |
FETCH NEXT FROM take_samples INTO @stmt | |
WHILE @@FETCH_STATUS = 0 | |
BEGIN | |
insert INTO ##samples | |
EXEC sys.sp_executesql @stmt; | |
FETCH NEXT FROM take_samples INTO @stmt | |
END | |
CLOSE take_samples | |
DEALLOCATE take_samples | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET STATISTICS IO, TIME on | |
GO | |
CREATE VIEW metadata.Catalogs | |
AS | |
SELECT | |
o.object_id AS major_id, | |
COALESCE(SubObj.sub_id,0) AS minor_id, | |
QUOTENAME(OBJECT_SCHEMA_NAME(o.object_id)) | |
+ISNULL('.' + QUOTENAME(o.name),'') | |
+ISNULL('.'+ QUOTENAME(SubObj.level2name),'') | |
AS FullName, | |
'SCHEMA' AS level0type, | |
OBJECT_SCHEMA_NAME(o.object_id) AS level0name, | |
CASE o.type_desc | |
WHEN 'VIEW' THEN 'VIEW' | |
WHEN 'USER_TABLE' THEN 'TABLE' | |
WHEN 'SQL_STORED_PROCEDURE' THEN 'PROCEDURE' | |
WHEN 'SQL_SCALAR_FUNCTION' THEN 'FUNCTION' | |
WHEN 'CLR_SCALAR_FUNCTION' THEN 'FUNCTION' | |
WHEN 'SQL_TABLE_VALUED_FUNCTION' THEN 'FUNCTION' | |
WHEN 'SQL_INLINE_TABLE_VALUED_FUNCTION' THEN 'FUNCTION' | |
ELSE o.type_desc | |
END AS level1type, | |
o.name AS level1Name, | |
SubObj.level2type, | |
SubObj.level2name | |
FROM sys.objects AS o | |
LEFT JOIN ( | |
SELECT | |
parent_object_id, object_id, 'CONSTRAINT', name | |
FROM sys.objects | |
WHERE | |
is_ms_shipped =0 AND | |
parent_object_id <>0 | |
UNION ALL | |
SELECT object_id, column_id, 'COLUMN', name FROM sys.columns UNION ALL | |
SELECT object_id, index_id, 'INDEX', name FROM sys.indexes UNION ALL | |
SELECT object_id, parameter_id, 'PARAMETER', name FROM sys.parameters | |
) AS SubObj(parent_id, sub_id, level2type, level2name) | |
ON o.object_id = SubObj.parent_id | |
WHERE | |
o.is_ms_shipped = 0 AND | |
o.parent_object_id = 0 | |
UNION ALL | |
SELECT | |
s.schema_id, | |
0, | |
QUOTENAME(s.name), | |
'SCHEMA', | |
name, | |
NULL, | |
NULL, | |
NULL, | |
NULL | |
FROM sys.schemas AS s | |
UNION ALL | |
SELECT | |
0, | |
0, | |
QUOTENAME(DB_NAME()), | |
'DATABASE', | |
DB_NAME(), | |
NULL, | |
NULL, | |
NULL, | |
NULL | |
GO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
source: http://www.sqlservice.se/ssms-data-classification-part-2/ | |
*/ | |
SET XACT_ABORT OFF; | |
IF (OBJECT_ID('tempdb..##server_collect') IS NOT NULL) | |
EXEC('DROP TABLE ##server_collect;') | |
GO | |
CREATE TABLE ##server_collect ( | |
[database_id] INT NOT NULL, | |
[object_id] INT NOT NULL, | |
[schema_name] SYSNAME NOT NULL, | |
[table_name] SYSNAME NOT NULL, | |
[column_name] SYSNAME NOT NULL, | |
[type_name] SYSNAME NOT NULL, | |
[max_length] SMALLINT NOT NULL, | |
[scale] SMALLINT NOT NULL, | |
[precision] SMALLINT NOT NULL, | |
[is_in_memory] BIT NULL | |
) | |
DECLARE @cmd NVARCHAR(4000) =N' | |
INSERT INTO ##server_collect | |
SELECT DISTINCT | |
DB_ID() AS database_id, | |
C.object_id, | |
S.NAME AS schema_name, | |
T.NAME AS table_name, | |
C.NAME AS column_name, | |
TP.name as type_name, | |
C.max_length, | |
C.scale, | |
c.precision, | |
COALESCE(OBJECTPROPERTY(t.object_id, ''TableIsMemoryOptimized''),CAST(0 AS BIT)) AS is_in_memory | |
FROM sys.schemas S | |
JOIN sys.tables T ON S.schema_id = T.schema_id | |
JOIN sys.columns C ON T.object_id = C.object_id | |
JOIN sys.types TP ON C.system_type_id = TP.system_type_id; | |
' | |
/** scan the server **/ | |
--SET @cmd = N'use [?];'+CHAR(10)+CHAR(13)+@cmd; | |
--EXEC sys.sp_MSforeachdb @command1 = @cmd | |
--EXEC dbo.sp_foreachdb @command =@cmd, -- nvarchar(max) | |
-- @replacechar = N'?', -- nchar(1) | |
-- @print_dbname = 1, -- bit | |
-- --@print_command_only = 1, -- bit | |
-- @suppress_quotename = 1, -- bit | |
-- @user_only = 1 | |
-- --, @name_pattern = N'' | |
-- --, @database_list = N'' | |
/** single database execute **/ | |
EXEC sys.sp_executesql @cmd; | |
DECLARE @InfoTypeRanking TABLE ( | |
info_type NVARCHAR(128) NOT NULL, | |
info_type_guid uniqueidentifier NOT NULL DEFAULT newid(), | |
ranking INT NOT NULL | |
); | |
DECLARE @Dictionary TABLE ( | |
pattern NVARCHAR(128) NOT NULL, | |
info_type NVARCHAR(128) NOT NULL, | |
sensitivity_label NVARCHAR(128) NOT NULL, | |
can_be_numeric BIT NOT NULL DEFAULT 1 | |
); | |
DECLARE @SensitivityLabel TABLE | |
( | |
sensitivity_label NVARCHAR(128) NOT NULL, | |
sensitivity_label_guid UNIQUEIDENTIFIER NOT NULL DEFAULT newid() | |
); | |
DECLARE @ClassifcationResults TABLE | |
( | |
[database_name] NVARCHAR(128), | |
[schema_name] NVARCHAR(128), | |
table_name NVARCHAR(128), | |
column_name NVARCHAR(128), | |
column_type_info NVARCHAR(256), | |
info_type NVARCHAR(128), | |
sensitivity_label NVARCHAR(128), | |
ranking INT, | |
can_be_numeric BIT, | |
is_in_memory BIT | |
); | |
/* If we get multiple matches we will apply the lowest ranking first | |
So if a column matches on both Name and CreditCard it will be classified as Name | |
Your priorities may differ from Microsoft's */ | |
INSERT INTO @InfoTypeRanking (info_type, ranking) | |
VALUES | |
('Networking', 100), | |
('Contact Info', 200), | |
('Credentials', 300), | |
('Name', 400), | |
('National ID', 500), | |
('SSN', 600), | |
('Credit Card', 700), | |
('Banking', 800), | |
('Financial', 900), | |
('Health', 1000), | |
('Date Of Birth', 1100), | |
('Other', 1200); | |
INSERT INTO @Dictionary (pattern, info_type, sensitivity_label, can_be_numeric) | |
VALUES | |
('%name%', 'Name', 'GDPR', 0), | |
('%ssn%', 'SSN', 'PII', 1), | |
('%acct%', 'Financial', 'PII', 1), | |
('%rtno%', 'Financial', 'PII', 1), | |
('%dob%', 'Date Of Birth', 'PII',1) | |
/* add or adjust as necessary */ | |
; | |
Insert @SensitivityLabel (sensitivity_label) | |
SELECT distinct sensitivity_label from @Dictionary; | |
INSERT INTO @ClassifcationResults | |
SELECT DISTINCT | |
DB_NAME(src.database_id) [database_name], | |
src.[schema_name], | |
src.[table_name], | |
src.[column_name], | |
CONCAT(src.[type_name] , | |
'('+CASE src.[type_name] | |
WHEN 'NVARCHAR' THEN COALESCE( CAST(NULLIF(src.max_length,-1)AS VARCHAR), 'max' ) | |
WHEN 'VARCHAR' THEN COALESCE( CAST(NULLIF(src.max_length,-1)AS VARCHAR), 'max' ) | |
WHEN 'VARBINARY' THEN COALESCE( CAST(NULLIF(src.max_length,-1)AS VARCHAR), 'max' ) | |
WHEN 'char' THEN COALESCE( CAST(NULLIF(src.max_length,-1)AS VARCHAR), 'max' ) | |
WHEN 'datetime2' THEN CAST(src.scale AS VARCHAR) | |
WHEN 'decimal' THEN CONCAT(src.precision,',', src.scale) | |
WHEN 'numeric' THEN CONCAT(src.precision,',', src.scale) | |
END+')') AS column_type_info, | |
D.info_type, | |
D.sensitivity_label, | |
R.ranking, | |
D.can_be_numeric, | |
src.is_in_memory | |
FROM ##server_collect AS src | |
LEFT JOIN @Dictionary D | |
ON ( | |
D.pattern NOT LIKE '%[%]%' | |
AND LOWER(src.[column_name]) = LOWER(D.pattern) COLLATE DATABASE_DEFAULT | |
) | |
OR | |
( | |
D.pattern LIKE '%[%]%' | |
AND LOWER(src.[column_name]) LIKE LOWER(D.pattern) COLLATE DATABASE_DEFAULT | |
) | |
LEFT JOIN @InfoTypeRanking R ON (R.info_type = D.info_type) | |
WHERE | |
(D.info_type IS NOT NULL) | |
AND NOT ( | |
D.can_be_numeric = 0 | |
AND src.[type_name] IN ( 'bigint', 'bit', 'decimal', 'float', 'int', 'money', 'numeric', 'smallint', 'smallmoney', 'tinyint' ) | |
); | |
SELECT | |
ROW_NUMBER() OVER (ORDER BY MR.schema_name, MR.table_name, MR.column_name) AS rnum, | |
CR.database_name, | |
MR.schema_name AS schema_name, | |
MR.table_name AS table_name, | |
MR.column_name AS column_name, | |
CR.is_in_memory AS IsInMemory, | |
CR.column_type_info AS column_type_info, | |
CR.info_type AS information_type_name, | |
CONVERT(VARCHAR(50), ITR.info_type_guid) AS info_type_guid, | |
CR.sensitivity_label AS sensitivity_label_name, | |
CONVERT(VARCHAR(50), SL.sensitivity_label_guid) AS sensitivity_label_guid | |
FROM ( | |
SELECT schema_name, | |
table_name, | |
column_name, | |
MIN(ranking) AS min_ranking | |
FROM @ClassifcationResults | |
GROUP BY schema_name, | |
table_name, | |
column_name | |
) MR | |
INNER JOIN @ClassifcationResults CR | |
ON CR.schema_name = MR.schema_name | |
AND CR.table_name = MR.table_name | |
AND CR.column_name = MR.column_name | |
AND CR.ranking = MR.min_ranking | |
JOIN @InfoTypeRanking ITR ON ITR.info_type = CR.info_type | |
JOIN @SensitivityLabel SL ON SL.sensitivity_label = CR.sensitivity_label | |
ORDER BY | |
CR.database_name,MR.schema_name, MR.table_name, MR.column_name; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--drop table #result_defs | |
select | |
o.object_id as proc_id | |
,o.name as proc_name | |
,x.name | |
,x.column_ordinal | |
,x.is_nullable, x.system_type_id, x.precision, x.scale | |
into #result_defs | |
from sys.objects as o | |
cross apply sys.dm_exec_describe_first_result_set_for_object(o.object_id,1) as x | |
where | |
is_ms_shipped = 0 AND type = 'p' | |
and is_hidden =0; | |
create unique clustered index result_defs_uq on #result_defs(proc_id, column_ordinal); | |
SELECT | |
p.object_id, | |
p.name, | |
result.min_proc_obj as class_out, | |
input.min_proc_obj as class_in, | |
x.* | |
into #results | |
FROM sys.procedures as p | |
OUTER APPLY ( | |
select top 1 | |
y.* | |
from ( | |
values | |
('insert', 1,0,0,0), | |
('get', 0,1,0,0), | |
('getall', 0,1,0,0), | |
('delete', 0, 0, 0, 1), | |
('update', 0,0,1,0), | |
('exists',null,null,null,null), | |
('count',null,null,null,null) | |
) as y(search, [create], [read], [update], [delete]) | |
where | |
charindex(y.search, p.name)> 0 | |
) as x | |
OUTER APPLY ( | |
SELECT TOP 1 | |
MIN(t2.proc_id) AS min_proc_obj | |
FROM #result_defs AS t2 | |
WHERE | |
t2.proc_id <= p.object_id AND | |
EXISTS (SELECT NULL FROM #result_defs as t1 | |
WHERE proc_id = p.object_id AND | |
t1.name = t2.name AND | |
t1.system_type_id = t2.system_type_id AND | |
t1.is_nullable = t2.is_nullable) | |
GROUP BY | |
t2.proc_id | |
HAVING | |
COUNT(*) = (select COUNT(*) from #result_defs where proc_id = p.object_id )AND | |
COUNT(*) = (select COUNT(*) from #result_defs where proc_id = t2.proc_id ) | |
ORDER BY min_proc_obj | |
)as result | |
OUTER APPLY ( | |
SELECT TOP 1 | |
MIN(t2.proc_id) AS min_proc_obj | |
FROM #result_defs AS t2 | |
WHERE | |
t2.proc_id <= p.object_id AND | |
EXISTS (SELECT NULL FROM (select object_id as proc_id, right(name,len(name)-1) as name, system_type_id from sys.parameters) as t1 | |
WHERE proc_id = p.object_id AND | |
t1.name = t2.name AND | |
t1.system_type_id = t2.system_type_id ) | |
GROUP BY | |
t2.proc_id | |
HAVING | |
COUNT(*) = (select COUNT(*) from #result_defs where proc_id = p.object_id )AND | |
COUNT(*) = (select COUNT(*) from #result_defs where proc_id = t2.proc_id ) | |
ORDER BY min_proc_obj | |
) as input | |
WHERE | |
p.is_ms_shipped = 0 AND | |
p.name not like 'sp%' | |
ORDER BY | |
p.name, p.object_id | |
select | |
object_name(class_out), COUNT(*) | |
from #results | |
where | |
class_out is not null | |
and exists ( | |
select NULL from #result_defs | |
where proc_id = #results.object_id | |
having COUNT(*)>1 | |
) | |
group by class_out | |
having COUNT(*) > 1 | |
order by 2 DESC | |
with meh as ( | |
select | |
class_out, | |
COUNT(DISTINCT r.object_id) as use_count | |
FROM #results AS r | |
JOIN #result_defs def on object_id = proc_id | |
where | |
class_out is not null | |
group by | |
class_out | |
having | |
COUNT(DISTINCT r.object_id)>1 AND | |
COUNT(def.column_ordinal) > COUNT(DISTINCT r.object_id) | |
) | |
select | |
* | |
from |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
s.object_id AS [TableID], | |
tbl.name AS [TableName], | |
sc.column_id AS ColumnID, | |
c.name as ColumnName, | |
s.stats_id, | |
s.name AS StatName, | |
sc.stats_column_id, | |
i.index_id, | |
i.name AS IdxName, | |
sp.last_updated, | |
s.auto_created, | |
sp.rows, | |
sp.rows_sampled, | |
sp.unfiltered_rows, | |
sp.modification_counter, | |
sp.steps, | |
sp.persisted_sample_percent, | |
i.is_hypothetical | |
--, i.is_ignored_in_optimization | |
FROM sys.tables AS tbl | |
JOIN sys.columns AS c ON c.object_id=tbl.object_id | |
LEFT JOIN ( | |
sys.stats AS s | |
JOIN sys.stats_columns AS sc ON sc.object_id = s.object_id AND sc.stats_id = s.stats_id | |
) ON | |
s.object_id = tbl.object_id AND sc.column_id = c.column_id | |
LEFT JOIN sys.indexes AS i ON i.object_id = tbl.object_id AND s.stats_id = i.index_id | |
CROSS APPLY sys.dm_db_stats_properties(s.object_id, s.stats_id) sp | |
ORDER BY | |
[TableName],s.stats_id,sc.stats_column_id |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment