Skip to content

Instantly share code, notes, and snippets.

@EitanBlumin
Last active April 26, 2022 13:31
Show Gist options
  • Save EitanBlumin/42a1ed597ccb9269d07cfcc9560958dc to your computer and use it in GitHub Desktop.
Save EitanBlumin/42a1ed597ccb9269d07cfcc9560958dc to your computer and use it in GitHub Desktop.
Example job command to implement self-monitoring for SQL Sentry monitoring service hearbeats
-- Run this in the right SQL Sentry database
--USE [SentryOne];
--USE [SQLSentry];
GO
IF OBJECT_ID('[dbo].[heartbeat_log]') IS NULL
BEGIN
CREATE TABLE [dbo].[heartbeat_log](
[servername] [nvarchar](300) CONSTRAINT PK_Heartbeat_Log PRIMARY KEY CLUSTERED WITH(IGNORE_DUP_KEY=ON,DATA_COMPRESSION=PAGE),
[heartbeatdate] [datetime] NULL,
[ActualHeartbeatDate] [datetime] NULL
);
END
GO
DECLARE
@HeartbeatThresholdMinutes INT = 5,
@HeartbeatRepeatAlertThresholdHours INT = 1,
@GlobalObjID UNIQUEIDENTIFIER
SELECT @GlobalObjID = ObjectID FROM dbo.vwObjects_Global;
DECLARE @SubjectTitle NVARCHAR(255), @BodyText NVARCHAR(4000);
SET @SubjectTitle = N'Monitoring Service: Heartbeat Failure'
-- Delete any irrelevant heartbeat failures from log
DELETE HL
-- optionally use this output to implement an "auto-resolve" logic:
-- OUTPUT deleted.servername INTO @RestoredHeartbeats
FROM dbo.heartbeat_log AS HL
INNER JOIN dbo.ManagementEngine AS E
ON HL.servername = E.servername
WHERE E.HeartbeatDateTime > HL.HeartbeatDate
-- Get all monitoring servers with outdated heartbeats and are not snoozed
DECLARE @site NVARCHAR(200), @servername NVARCHAR(200), @HeartbeatDateTime datetime
, @text NVARCHAR(200), @title NVARCHAR(200), @HeartbeatDateTimeLocal datetime;
DECLARE serverlist CURSOR
LOCAL FAST_FORWARD
FOR
SELECT T.[Name],E.[servername], ISNULL(E.HeartbeatDateTime, E.LastInitializationDateTime)
FROM dbo.ManagementEngine E
INNER JOIN [dbo].[Site] T ON T.ID = E.SiteID
WHERE ISNULL(E.HeartbeatDateTime, E.LastInitializationDateTime) < DATEADD(MINUTE, -@HeartbeatThresholdMinutes, GETUTCDATE())
AND NOT EXISTS (SELECT st.ObjectID FROM dbo.SnoozeStatus st WHERE st.ObjectID IN (E.ObjectID,T.ObjectID,@GlobalObjID) )
OPEN serverlist
WHILE 1=1
BEGIN
FETCH NEXT FROM serverlist INTO @site, @servername, @HeartbeatDateTime
IF @@FETCH_STATUS <> 0 BREAK;
SET @HeartbeatDateTimeLocal = CONVERT(datetime, @HeartbeatDateTime AT TIME ZONE 'UTC' AT TIME ZONE 'Israel Standard Time' )
SET @title = @site + ': '+ QUOTENAME(@servername) + ' ' + @SubjectTitle
SET @text = N'Monitor server <b>' + @servername + N'</b> has lost connection to the repository.<br/>Last heartbeat was at <b>' + convert(nvarchar(200),@HeartbeatDateTimeLocal, 121) + N' (IST)</b>'
SET @BodyText = N'[Message]: ' + @text
+ N'<br/><br/>[Start Time (Local)]: ' + CONVERT(nvarchar(25), @HeartbeatDateTimeLocal, 121)
+ N'<br/>[Start Time (UTC)]: ' + CONVERT(nvarchar(25), @HeartbeatDateTime, 121)
+ N'<br/>[Duration]: ' + ISNULL(NULLIF(CONVERT(varchar(100), DATEDIFF(dd,0, GETUTCDATE()-@HeartbeatDateTime)), 0) + ' day(s), ', '')
+ CONVERT(varchar(100), GETUTCDATE()-@HeartbeatDateTime, 114)
+ N'<br/>----------------------------------------------------------------------'
+ N'<br/>[Timestamp (Local)]: ' + CONVERT(nvarchar(25), GETDATE(), 121)
+ N'<br/>[Timestamp (UTC)]: ' + CONVERT(nvarchar(25), GETUTCDATE(), 121)
+ N'<br/>[Generated By]: ' + @@SERVERNAME
+ N'<br/>[Monitor Type]: SQLSentryHeartbeatCheck<br/>[Condition]: ' + @SubjectTitle
;
WITH trgt AS (SELECT * FROM dbo.heartbeat_log WHERE servername = @servername)
MERGE INTO trgt
USING (SELECT * FROM (VALUES(@servername, GETUTCDATE())) AS v(servername, TimeNow)) AS src
ON trgt.servername = src.servername
WHEN NOT MATCHED BY TARGET THEN
INSERT (servername, heartbeatdate)
VALUES (servername, TimeNow)
WHEN MATCHED AND src.TimeNow > dateadd(hour, @HeartbeatRepeatAlertThresholdHours, trgt.heartbeatdate) THEN
UPDATE SET heartbeatdate = src.TimeNow
;
IF @@ROWCOUNT > 0
BEGIN
PRINT N'Heartbeat failure for ' + QUOTENAME(@servername)
-- Get additional metadata about the server if it's also a SQL Server
DECLARE @SQLVersion NVARCHAR(4000), @SQLEdition NVARCHAR(4000)
SELECT
@SQLVersion = CASE
WHEN MajorVersionNumber = 10 AND MinorVersionNumber = 50 THEN '2008R2'
WHEN EngineEdition IN (5,6) THEN 'sql_azure'
ELSE
CASE MajorVersionNumber
WHEN 8 THEN '2000'
WHEN 9 THEN '2005'
WHEN 10 THEN '2008'
WHEN 11 THEN '2012'
WHEN 12 THEN '2014'
WHEN 13 THEN '2016'
WHEN 14 THEN '2017'
WHEN 15 THEN '2019'
WHEN 16 THEN '2022'
ELSE ''
END
END
,@SQLEdition = CASE [EngineEdition] -- https://docs.microsoft.com/en-us/sql/t-sql/functions/serverproperty-transact-sql
WHEN 2 THEN 'standard' -- also web and BI
WHEN 3 THEN 'enterprise' -- also datacenter, developer and evaluation
WHEN 4 THEN 'express'
WHEN 5 THEN 'azure_sql_database'
WHEN 6 THEN 'azure_sql_data_warehouse'
WHEN 8 THEN 'azure_managed_instance'
ELSE ''
END
FROM [dbo].[vwSqlServer]
WHERE [ObjectName] = @servername
/* ---- TODO: Add your alert logic below this line ---- */
/*-- Example 1: Sending an e-mail using DBMail:
EXEC msdb..sp_send_dbmail
@recipients = '[email protected]'
, @subject = @title
, @body = @BodyText
, @body_format = 'HTML'
, @importance = 'high'
*/
/*-- Example 2: Throwing an error to fail the job:
--RAISERROR(N'%s', 16, 1, @BodyText);
*/
/*-- Example 3: Whatever else:
EXECUTE SendSNMPAlertCLR
@SourceServer = @servername,
@Subject = @title,
@Body = @BodyText,
@Severity = 3,
@SQLVersion = @SQLVersion,
@SQLEdition = @SQLEdition,
@Condition = @SubjectTitle,
@Site = @site
*/
/* ---- TODO: Add your alert logic above this line ---- */
END
END
CLOSE serverlist
DEALLOCATE serverlist
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment