Skip to content

Instantly share code, notes, and snippets.

@j-thepac
Last active February 21, 2025 10:39
Show Gist options
  • Save j-thepac/00dd4a3eaa3a6a36a92223ee873f5361 to your computer and use it in GitHub Desktop.
Save j-thepac/00dd4a3eaa3a6a36a92223ee873f5361 to your computer and use it in GitHub Desktop.
Import Queries Azure SQL
/* Performance */
-- Do analysis using below queries
-- Also SQL logs in UI
- Open Azure Portal Homepage
- Search for Dedicated SQL pool
- Click on the Dedicated SQL Pool > Monitoring > metrice > filter DWU used - note down time
- Goto Query Activity and corelaetd the query executed in the above time
/* Table ,Distribution , etc */
with DataDistribution as (
SELECT
s.name as [Schema Name]
,t.name as [Table Name]
,tp.[distribution_policy_desc] as [Distribution Policy Name]
,sum([row_count]) as [Table Row Count]
,max(row_count) as [Max Distribution Row Count]
,min(row_count) as [Min Distribution Row Count]
,avg(row_count) as [Avg Distribution Row Count]
from sys.schemas s
JOIN sys.tables t ON s.[schema_id] = t.[schema_id]
JOIN sys.pdw_table_distribution_properties tp ON t.[object_id] = tp.[object_id]
JOIN sys.pdw_table_mappings tm ON t.[object_id] = tm.[object_id]
JOIN sys.pdw_nodes_tables nt ON tm.[physical_name] = nt.[name]
JOIN sys.dm_pdw_nodes pn ON nt.[pdw_node_id] = pn.[pdw_node_id]
JOIN sys.pdw_distributions di ON nt.[distribution_id] = di.[distribution_id]
JOIN sys.dm_pdw_nodes_db_partition_stats nps ON nt.[object_id] = nps.[object_id]
AND nt.[pdw_node_id] = nps.[pdw_node_id]
AND nt.[distribution_id] = nps.[distribution_id]
AND s.name = 'cca_pricing'
GROUP BY s.name
,t.name
,tp.[distribution_policy_desc]
)
Select [Schema Name],
[Table Name],
[Distribution Policy Name],
[Table Row Count],
[Max Distribution Row Count],
[Min Distribution Row Count],
[Avg Distribution Row Count],
CASE WHEN [Table Row Count] = 0 then -1
else abs([Max Distribution Row Count] * 1.0 - [Min Distribution Row Count]*1.0) / [Max Distribution Row Count] * 100.0
END as [Table Skew Percent]
FROM DataDistribution order by [Table Row Count] desc
/* Each Col of a Table*/
SELECT
sm.[name] AS [schema_name]
, tb.[name] AS [table_name]
, st.[name] AS [stats_name]
, st.[filter_definition] AS [stats_filter_definition]
, st.[has_filter] AS [stats_is_filtered]
, STATS_DATE(st.[object_id],st.[stats_id])AS [stats_last_updated_date]
, co.[name] AS [stats_column_name]
, ty.[name] AS [column_type]
, co.[max_length] AS [column_max_length]
, co.[precision] AS [column_precision]
, co.[scale] AS [column_scale]
, co.[is_nullable] AS [column_is_nullable]
, co.[collation_name] AS [column_collation_name]
, QUOTENAME(sm.[name])+'.'+QUOTENAME(tb.[name])AS two_part_name
, QUOTENAME(DB_NAME())+'.'+QUOTENAME(sm.[name])+'.'+QUOTENAME(tb.[name])
AS three_part_name
FROM sys.objects AS ob
JOIN sys.stats AS st ON ob.[object_id] = st.[object_id]
JOIN sys.stats_columns AS sc ON st.[stats_id] = sc.[stats_id] AND st.[object_id] = sc.[object_id]
JOIN sys.columns AS co ON sc.[column_id] = co.[column_id] AND sc.[object_id] = co.[object_id]
JOIN sys.types AS ty ON co.[user_type_id] = ty.[user_type_id]
JOIN sys.tables AS tb ON co.[object_id] = tb.[object_id]
JOIN sys.schemas AS sm ON tb.[schema_id] = sm.[schema_id]
WHERE 1=1
AND tb.[name] = 'table';
/* Get longest Running Users ( highest elapsed time, which can be indicative of high CPU usage: sql) */
WITH
RequestStats AS (
SELECT
req.request_id,
req.start_time,
req.end_time,
req.total_elapsed_time,
req.status,
req.session_id,
s.login_name,
req.resource_class,
req.[label],
req.[command],
s.app_name,
s.query_count,
ROW_NUMBER() OVER (ORDER BY req.total_elapsed_time DESC) AS rn
FROM sys.dm_pdw_exec_requests AS req
JOIN sys.dm_pdw_exec_sessions AS s ON req.session_id = s.session_id
WHERE req.start_time >= DATEADD(HOUR, -24, GETDATE()) -- Filter for the last 24 hours
)
SELECT
rn,
status,
[label],
[command],
app_name,
query_count,
login_name,
total_elapsed_time AS elapsed_in_ms,
total_elapsed_time/(1000*60) AS elapsed_in_m,
request_id,
start_time,
end_time,
resource_class,
session_id
FROM RequestStats
WHERE 1=1
--request_id in ('QID347522159',' QID347569125',' QID347569449 ','QID347595345 ',' QID347595346')
--command like '%CCA%' or command like 'CREATE EXTERNAL %'
AND rn <= 20 -- Get the top 10 requests by elapsed time
ORDER BY total_elapsed_time DESC;
-----also check
select [label],command,* from sys.dm_pdw_exec_requests
where 1=1 AND request_id in ('QIDxxxx',' QID3xxxx',' QID347569449 ','QID347595345 ',' QID347595346')
--command like 'COPY INTO%'
--or command like '%CCA%'
command like 'CREATE EXTERNAL TABLE%'
order by total_elapsed_time desc
----2 .IDentify Steps
select * from sys.dm_pdw_request_steps where request_id in ( 'QID347595345' ,'QID347595346','QID347595289','QID347595288')
--- 3. Data processed
SELECT r.[request_id], r.[status], r.resource_class, r.command,
sum(bytes_processed) AS bytes_processed, sum(rows_processed) AS rows_processed
FROM sys.dm_pdw_exec_requests r
JOIN sys.dm_pdw_dms_workers w ON r.[request_id] = w.request_id
--WHERE [label] = 'COPY_WITH_CONTEXT: server:schema%'and type = 'WRITER'
GROUP BY r.[request_id], r.[status],r.resource_class, r.command
Having r.[request_id] in ( 'QID347595345' ,'QID347595346','QID347595289','QID347595288')
-- 4 .Check Errors
select * from sys.dm_pdw_errors where [request_id] in ( 'QID347595345' ,'QID347595346','QID347595289','QID347595288')
-- Waits
SELECT * FROM sys.dm_pdw_waits -- Long Queries
/* Fix performance Issue
> slowiness in read - add hash distribution
60 distributions. For balanced performance,
the rows in your distributed table should be spread evenly across all the distributions.
> slownesss in write (temp tables ) - PartitionBy Data in spark before writing
*/
CREATE TABLE dbo.yourTable2
WITH (CLUSTERED COLUMNSTORE INDEX,DISTRIBUTION = HASH ( yourColumn ) )
AS
SELECT * FROM dbo.yourTable;
DROP TABLE dbo.yourTable;
RENAME OBJECT dbo.yourTable2 TO yourTable;
--Also
- CREATE MATERIALIZED VIEW mv WITH (distribution = hash(c), FOR_APPEND) AS SELECT * FROM T ;
- Data with snapshot is created
- If query result is saved , data can be queird faster
- REFRESH MATERIALIZED VIEW employee_summary; -- Data is static and needs to be refreshed
/* Access */
---Create new
IF NOT EXISTS (SELECT * FROM sys.databases WHERE name = 'dbSox')
BEGIN
EXEC('CREATE DATABASE [dbSox]');
END;
CREATE MASTER KEY ENCRYPTION BY PASSWORD = 'pass';
CREATE DATABASE SCOPED CREDENTIAL SynapseMI WITH IDENTITY = 'Managed Identity'
Create EXTERNAL DATA SOURCE [ez] WITH (LOCATION = N'abfss://[email protected]/', CREDENTIAL = [SynapseMI])
Create EXTERNAL DATA SOURCE [dls] WITH (LOCATION = N'abfss://[email protected]/', CREDENTIAL = [SynapseMI])
IF NOT EXISTS (SELECT * FROM sys.external_file_formats WHERE name = 'DELTA') CREATE EXTERNAL FILE FORMAT [DELTA] WITH (FORMAT_TYPE = DELTA);
IF NOT EXISTS (SELECT * FROM sys.external_file_formats WHERE name = 'PARQUET') CREATE EXTERNAL FILE FORMAT [PARQUET] WITH (FORMAT_TYPE = PARQUET);
IF NOT EXISTS (SELECT * FROM sys.schemas WHERE name = 'default')
BEGIN
EXEC('CREATE SCHEMA [default]');
END ;
drop EXTERNAL DATA SOURCE [ez]
drop EXTERNAL DATA SOURCE [ds]
select * from sys.external_data_sources --- show all existing External Sources
select * from sys.database_scoped_credentials ; -- show all credentials created
-- join of above 2
select eds.data_source_id,eds.name,eds.location ,dsc.name
from sys.external_data_sources as eds left outer join sys.database_scoped_credentials as dsc
on eds.credential_id = dsc.credential_id;
-- Roles
SELECT DISTINCT
pr.principal_id
, pr.name AS [UserName]
, o.[name] AS 'Object'
, pe.class_desc
, pr.type_desc AS [User_or_Role]
, pr.authentication_type_desc AS [Auth_Type]
, pe.state_desc
, pe.permission_name
FROM sys.database_principals AS pr
JOIN sys.database_permissions AS pe ON pe.grantee_principal_id = pr.principal_id
LEFT JOIN sys.objects AS o on (o.object_id = pe.major_id)
-- Roles 2
SELECT r.name role_principal_name
,m.name AS member_principal_name
FROM sys.database_role_members rm
JOIN sys.database_principals r ON rm.role_principal_id = r.principal_id
JOIN sys.database_principals m ON rm.member_principal_id = m.principal_id
WHERE 1=1
grant select on schema.V to "my_group";
revoke select on schema.V to "my_group";
------GEn2
/* View Schema */
SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH, IS_NULLABLE
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'ViewName';
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment