Skip to content

Instantly share code, notes, and snippets.

@davidlu1001
Created March 19, 2025 09:06
Show Gist options
  • Select an option

  • Save davidlu1001/c64dfeb819dce17c16782072c9ee1e56 to your computer and use it in GitHub Desktop.

Select an option

Save davidlu1001/c64dfeb819dce17c16782072c9ee1e56 to your computer and use it in GitHub Desktop.
failoverMonitor.ps1
# FailoverMonitor.ps1
# Automatic Failover Monitoring and Execution Script
# This script monitors Windows Events forwarded from servers and triggers
# DNS failover when error patterns are detected that meet threshold criteria.
#
# Required dependencies:
# - dnsFailover.ps1 in the same directory
# - Windows Event Forwarding properly configured
# - Appropriate permissions for DNS and IIS operations
[CmdletBinding()]
param (
[Parameter()]
[switch]$Initialize,
[Parameter()]
[switch]$TestError,
[Parameter()]
[string]$Server = $env:COMPUTERNAME,
[Parameter()]
[string]$ErrorType = "COMException",
[Parameter()]
[string]$ConfigPath = "$PSScriptRoot\failover_config.json",
[Parameter()]
[string]$StateFilePath = "$PSScriptRoot\error_state.json",
[Parameter()]
[string]$LogFilePath = "$PSScriptRoot\FailoverMonitor.log",
[Parameter()]
[switch]$Force
)
# Script version
$script:Version = "1.0.0"
# Set strict mode for better error detection
Set-StrictMode -Version Latest
$ErrorActionPreference = 'Stop'
#region Functions
# Function for handling log messages
function Write-Log {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[string]$Message,
[Parameter(Mandatory = $false)]
[ValidateSet("INFO", "WARNING", "ERROR", "SUCCESS", "DEBUG")]
[string]$Level = "INFO",
[Parameter(Mandatory = $false)]
[switch]$NoConsole
)
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
$logMessage = "[$timestamp] [$Level] $Message"
# Write to console with appropriate color if not suppressed
if (-not $NoConsole) {
switch ($Level) {
"INFO" { Write-Host $logMessage -ForegroundColor Cyan }
"WARNING" { Write-Host $logMessage -ForegroundColor Yellow }
"ERROR" { Write-Host $logMessage -ForegroundColor Red }
"SUCCESS" { Write-Host $logMessage -ForegroundColor Green }
"DEBUG" {
# Only show debug messages in verbose mode
if ($VerbosePreference -eq 'Continue') {
Write-Host $logMessage -ForegroundColor Gray
}
}
}
}
# Append to log file
try {
# Create the log directory if it doesn't exist
$logDir = Split-Path -Path $LogFilePath -Parent
if (-not (Test-Path -Path $logDir -PathType Container)) {
New-Item -Path $logDir -ItemType Directory -Force | Out-Null
}
Add-Content -Path $LogFilePath -Value $logMessage -ErrorAction Stop
}
catch {
Write-Warning "Failed to write to log file: $_"
}
}
# Function to rotate log files
function Rotate-LogFile {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[string]$LogPath,
[Parameter(Mandatory = $false)]
[int]$MaxSizeMB = 10,
[Parameter(Mandatory = $false)]
[int]$FilesToKeep = 5
)
# Check if log file exists and exceeds max size
if (Test-Path $LogPath) {
$logFile = Get-Item $LogPath
if ($logFile.Length -gt ($MaxSizeMB * 1MB)) {
Write-Log "Log file size limit reached. Rotating logs..." -Level "DEBUG"
$directory = Split-Path $LogPath -Parent
$baseName = (Split-Path $LogPath -Leaf).Split('.')[0]
$extension = if ($logFile.Extension) { $logFile.Extension } else { ".log" }
$timestamp = Get-Date -Format "yyyyMMdd-HHmmss"
$newName = Join-Path $directory "$($baseName)_$($timestamp)$extension"
# Rename current log file
try {
Rename-Item -Path $LogPath -NewName $newName -Force
Write-Log "Log file rotated to: $newName" -Level "DEBUG"
# Clean up old log files
$oldLogs = Get-ChildItem -Path $directory -Filter "$baseName*$extension" |
Where-Object { $_.Name -ne (Split-Path $LogPath -Leaf) } |
Sort-Object LastWriteTime -Descending |
Select-Object -Skip $FilesToKeep
foreach ($old in $oldLogs) {
Remove-Item $old.FullName -Force
Write-Log "Removed old log file: $($old.Name)" -Level "DEBUG"
}
}
catch {
Write-Log "Failed to rotate log file: $_" -Level "ERROR"
}
}
}
}
# Function to read the configuration file
function Get-FailoverConfig {
[CmdletBinding()]
param()
try {
if (Test-Path $ConfigPath) {
$config = Get-Content $ConfigPath -Raw | ConvertFrom-Json
# Validate required properties
$requiredProps = @('dnsServer', 'lookupZone', 'dnsName', 'errorThreshold',
'windowMinutes', 'cooldownMinutes', 'Env')
$missingProps = $requiredProps | Where-Object { -not $config.PSObject.Properties.Name.Contains($_) }
if ($missingProps.Count -gt 0) {
Write-Log "Configuration file is missing required properties: $($missingProps -join ', ')" -Level "WARNING"
# Add missing properties with default values
foreach ($prop in $missingProps) {
Add-Member -InputObject $config -MemberType NoteProperty -Name $prop -Value $null
}
}
# Apply default values for null properties
if (-not $config.dnsServer) { $config.dnsServer = "dns1.company.com" }
if (-not $config.lookupZone) { $config.lookupZone = "company.local" }
if (-not $config.dnsName) { $config.dnsName = "LendingWebServer" }
if (-not $config.errorThreshold) { $config.errorThreshold = 3 }
if (-not $config.windowMinutes) { $config.windowMinutes = 10 }
if (-not $config.cooldownMinutes) { $config.cooldownMinutes = 30 }
if (-not $config.processInactiveServerErrors) { $config.processInactiveServerErrors = $false }
if (-not $config.Env) { $config.Env = "Prod" }
if (-not $config.mailSettings) {
$config.mailSettings = @{
smtpServer = "smtp.company.com"
from = "[email protected]"
to = @("[email protected]")
enableSsl = $false
port = 25
}
}
# Save updated config if there were missing properties
if ($missingProps.Count -gt 0) {
$config | ConvertTo-Json -Depth 5 | Set-Content $ConfigPath
}
return $config
}
else {
# Create default configuration
$config = @{
dnsServer = "dns1.company.com"
lookupZone = "company.local"
dnsName = "LendingWebServer"
errorThreshold = 3
windowMinutes = 10
cooldownMinutes = 30
processInactiveServerErrors = $false
Env = "Prod"
mailSettings = @{
smtpServer = "smtp.company.com"
from = "[email protected]"
to = @("[email protected]")
enableSsl = $false
port = 25
}
}
# Create directory if it doesn't exist
$configDir = Split-Path -Path $ConfigPath -Parent
if (-not (Test-Path -Path $configDir -PathType Container)) {
New-Item -Path $configDir -ItemType Directory -Force | Out-Null
}
$config | ConvertTo-Json -Depth 5 | Set-Content $ConfigPath
Write-Log "Created default configuration file at $ConfigPath" -Level "INFO"
return $config
}
}
catch {
Write-Log "Error reading configuration: $_" -Level "ERROR"
throw
}
}
# Function to get DNS Failover script path
function Get-DNSFailoverScriptPath {
[CmdletBinding()]
param()
$scriptPath = Join-Path $PSScriptRoot "dnsFailover.ps1"
if (-not (Test-Path $scriptPath)) {
# Try to find it in parent directory
$scriptPath = Join-Path (Split-Path $PSScriptRoot -Parent) "dnsFailover.ps1"
if (-not (Test-Path $scriptPath)) {
Write-Log "DNS Failover script not found at expected locations. Please specify path in config." -Level "ERROR"
throw "DNS Failover script not found. Expected at: $scriptPath"
}
}
return $scriptPath
}
# Import the Get-ActiveHost function from dnsFailover.ps1
function Import-DNSFailoverFunctions {
[CmdletBinding()]
param()
try {
$dnsFailoverPath = Get-DNSFailoverScriptPath
# Import the script as a module to access its functions
Import-Module $dnsFailoverPath -Force -DisableNameChecking
# Verify the function exists
if (-not (Get-Command "Get-ActiveHost" -ErrorAction SilentlyContinue)) {
Write-Log "The Get-ActiveHost function was not found in the imported script." -Level "ERROR"
throw "Required function 'Get-ActiveHost' not found in DNS Failover script"
}
Write-Log "Successfully imported functions from DNS Failover script" -Level "DEBUG"
}
catch {
Write-Log "Failed to import DNS Failover functions: $_" -Level "ERROR"
throw
}
}
# Check if a server is the active server using the dnsFailover functions
function Test-IsActiveServer {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[string]$ServerName,
[Parameter(Mandatory = $true)]
[PSCustomObject]$Config
)
try {
# Parameters for Get-ActiveHost
$params = @{
dnsName = $Config.dnsName
dnsServer = $Config.dnsServer
lookupZone = $Config.lookupZone
}
# Call Get-ActiveHost from the imported module
$activeHost = Get-ActiveHost @params
# Compare server names (ignoring domain parts for flexibility)
$shortActiveHost = $activeHost -replace '\..*$', ''
$shortServerName = $ServerName -replace '\..*$', ''
Write-Log "Active server check - Current: $ServerName, Active: $activeHost" -Level "DEBUG"
return $shortServerName -ieq $shortActiveHost # Case-insensitive comparison
}
catch {
Write-Log "Error checking if server is active: $_" -Level "ERROR"
return $false # Default to not active on error
}
}
# Get or initialize the error state tracking file
function Get-ErrorState {
[CmdletBinding()]
param()
try {
if (Test-Path $StateFilePath) {
$state = Get-Content $StateFilePath -Raw | ConvertFrom-Json
# Check for required properties and initialize if missing
if (-not (Get-Member -InputObject $state -Name "errors" -MemberType Properties)) {
$state | Add-Member -MemberType NoteProperty -Name "errors" -Value @()
}
if (-not (Get-Member -InputObject $state -Name "last_failover" -MemberType Properties)) {
$state | Add-Member -MemberType NoteProperty -Name "last_failover" -Value $null
}
if (-not (Get-Member -InputObject $state -Name "active_server" -MemberType Properties)) {
$state | Add-Member -MemberType NoteProperty -Name "active_server" -Value $null
}
return $state
}
else {
# Create directory if it doesn't exist
$stateDir = Split-Path -Path $StateFilePath -Parent
if (-not (Test-Path -Path $stateDir -PathType Container)) {
New-Item -Path $stateDir -ItemType Directory -Force | Out-Null
}
# Initialize new state file
$state = @{
errors = @()
last_failover = $null
active_server = $null
last_trigger = $null
last_from_server = $null
failover_count_24h = 0
}
$state | ConvertTo-Json -Depth 5 | Set-Content $StateFilePath
Write-Log "Initialized new error state file" -Level "DEBUG"
return $state
}
}
catch {
Write-Log "Error getting error state: $_" -Level "ERROR"
# Return a default state object if there's an error
return @{
errors = @()
last_failover = $null
active_server = $null
last_trigger = $null
last_from_server = $null
failover_count_24h = 0
}
}
}
# Save the current error state
function Save-ErrorState {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[PSCustomObject]$State
)
try {
$State | ConvertTo-Json -Depth 5 | Set-Content $StateFilePath
Write-Log "Error state saved successfully" -Level "DEBUG"
}
catch {
Write-Log "Failed to save error state: $_" -Level "ERROR"
}
}
# Update error occurrences and check thresholds
function Update-ErrorOccurrence {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[string]$ErrorType,
[Parameter(Mandatory = $true)]
[string]$SourceServer,
[Parameter(Mandatory = $true)]
[PSCustomObject]$Config,
[Parameter(Mandatory = $false)]
[string]$EventMessage = "",
[Parameter(Mandatory = $false)]
[int]$EventId = 0
)
try {
# Get current state
$state = Get-ErrorState
# Current time
$now = Get-Date
# Check if server is active
$isActive = Test-IsActiveServer -ServerName $SourceServer -Config $Config
# Add new error entry
$errorEntry = @{
type = $ErrorType
timestamp = $now.ToString('o')
server = $SourceServer
is_active_server = $isActive
event_id = $EventId
event_message = if ($EventMessage.Length -gt 500) { $EventMessage.Substring(0, 500) + "..." } else { $EventMessage }
}
$state.errors += $errorEntry
# Clean up old errors outside the time window
$cutoffTime = $now.AddMinutes(-$Config.windowMinutes)
$state.errors = @($state.errors | Where-Object {
[DateTime]::Parse($_.timestamp) -gt $cutoffTime
})
# Save updated state
Save-ErrorState -State $state
# Filter errors based on configuration
$relevantErrors = @($state.errors | Where-Object {
$_.type -eq $ErrorType -and
$_.server -eq $SourceServer -and
($_.is_active_server -or $Config.processInactiveServerErrors)
})
$count = $relevantErrors.Count
$serverType = if ($isActive) { "active" } else { "inactive" }
Write-Log "Detected '$ErrorType' error (EventID: $EventId) from $serverType server: $SourceServer. Current count: $count/$($Config.errorThreshold) in $($Config.windowMinutes)min window" -Level "INFO"
return @{
Count = $count
IsActive = $isActive
RelevantErrors = $relevantErrors
}
}
catch {
Write-Log "Error updating error occurrence: $_" -Level "ERROR"
return @{ Count = 0; IsActive = $false; RelevantErrors = @() }
}
}
# Check if the system is in cooldown period after a failover
function Test-FailoverCooldown {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[PSCustomObject]$Config
)
try {
$state = Get-ErrorState
# If no previous failover, no cooldown applies
if (-not $state.last_failover) {
return @{ InCooldown = $false }
}
# Check if within cooldown period
$lastFailover = [DateTime]::Parse($state.last_failover)
$cooldownEnd = $lastFailover.AddMinutes($Config.cooldownMinutes)
$now = Get-Date
if ($now -lt $cooldownEnd) {
$remainingMinutes = [Math]::Ceiling(($cooldownEnd - $now).TotalMinutes)
return @{
InCooldown = $true
RemainingMinutes = $remainingMinutes
CooldownEnd = $cooldownEnd
LastFailover = $lastFailover
}
}
return @{ InCooldown = $false }
}
catch {
Write-Log "Error checking failover cooldown: $_" -Level "ERROR"
return @{ InCooldown = $false } # Default to not in cooldown if error
}
}
# Record a failover event
function Record-Failover {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[string]$Trigger,
[Parameter(Mandatory = $true)]
[string]$FromServer,
[Parameter(Mandatory = $true)]
[string]$ToServer
)
try {
$state = Get-ErrorState
$now = Get-Date
# Check if this is a new day compared to last failover
$resetDailyCount = $true
if ($state.last_failover) {
$lastFailover = [DateTime]::Parse($state.last_failover)
if ($lastFailover.Date -eq $now.Date) {
$resetDailyCount = $false
}
}
$state.last_failover = $now.ToString('o')
$state.active_server = $ToServer
$state.last_trigger = $Trigger
$state.last_from_server = $FromServer
# Update daily failover counter
if ($resetDailyCount) {
$state.failover_count_24h = 1
}
else {
$state.failover_count_24h = if ($state.failover_count_24h) { $state.failover_count_24h + 1 } else { 1 }
}
Save-ErrorState -State $state
Write-Log "Failover recorded: $FromServer -> $ToServer, Trigger: $Trigger, 24h count: $($state.failover_count_24h)" -Level "INFO"
}
catch {
Write-Log "Error recording failover: $_" -Level "ERROR"
}
}
# Send email notification about failover
function Send-FailoverNotification {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[string]$OldServer,
[Parameter(Mandatory = $true)]
[string]$NewServer,
[Parameter(Mandatory = $true)]
[string]$ErrorType,
[Parameter(Mandatory = $true)]
[int]$EventId,
[Parameter(Mandatory = $false)]
[string]$EventMessage = "No detailed message available",
[Parameter(Mandatory = $true)]
[PSCustomObject]$Config
)
try {
$subject = "AUTOMATIC FAILOVER EXECUTED: $OldServer -> $NewServer"
$body = @"
<html>
<body style="font-family: Arial, sans-serif;">
<h2 style="color: #c00;">Automatic Failover Has Been Executed</h2>
<p><strong>Time:</strong> $(Get-Date -Format "yyyy-MM-dd HH:mm:ss")</p>
<p><strong>Trigger:</strong> $ErrorType error threshold reached (Event ID: $EventId)</p>
<p><strong>From Server:</strong> $OldServer</p>
<p><strong>To Server:</strong> $NewServer</p>
<p><strong>Environment:</strong> $($Config.Env)</p>
<h3>Event Details:</h3>
<pre style="background-color: #f0f0f0; padding: 10px; border: 1px solid #ddd;">$([System.Web.HttpUtility]::HtmlEncode($EventMessage))</pre>
<p>This is an automated message from the Failover Monitoring System.</p>
</body>
</html>
"@
$mailParams = @{
SmtpServer = $Config.mailSettings.smtpServer
From = $Config.mailSettings.from
To = $Config.mailSettings.to
Subject = $subject
Body = $body
BodyAsHtml = $true
}
# Add optional email configuration parameters if they exist
if ($Config.mailSettings.port) {
$mailParams['Port'] = $Config.mailSettings.port
}
if ($Config.mailSettings.enableSsl -eq $true) {
$mailParams['UseSsl'] = $true
}
if ($Config.mailSettings.credential) {
# Handle credentials - might need adjustment based on how credentials are stored
$securePassword = ConvertTo-SecureString $Config.mailSettings.credential.password -AsPlainText -Force
$credential = New-Object System.Management.Automation.PSCredential($Config.mailSettings.credential.username, $securePassword)
$mailParams['Credential'] = $credential
}
# Send the email
Send-MailMessage @mailParams
Write-Log "Failover notification email sent successfully" -Level "SUCCESS"
return $true
}
catch {
Write-Log "Failed to send failover notification email: $_" -Level "ERROR"
return $false
}
}
# Execute the DNS failover operation
function Invoke-DNSFailover {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[PSCustomObject]$Config,
[Parameter(Mandatory = $true)]
[string]$ErrorType,
[Parameter(Mandatory = $true)]
[int]$EventId,
[Parameter(Mandatory = $false)]
[string]$EventMessage = "No detailed message available"
)
try {
# Get current active host before failover
$params = @{
dnsName = $Config.dnsName
dnsServer = $Config.dnsServer
lookupZone = $Config.lookupZone
}
$activeHost = Get-ActiveHost @params
Write-Log "Current active host before failover: $activeHost" -Level "INFO"
if (-not $activeHost) {
Write-Log "Unable to determine current active host. Aborting failover." -Level "ERROR"
return $false
}
# Get DNS Failover script path
$dnsFailoverScript = Get-DNSFailoverScriptPath
# Build parameters for DNS failover script
$scriptParams = @(
"-Env", $Config.Env,
"-Ops", "complete-cycle",
"-dnsServer", $Config.dnsServer,
"-lookupZone", $Config.lookupZone
)
# Execute the failover script
Write-Log "Executing DNS failover with parameters: $scriptParams" -Level "INFO"
& $dnsFailoverScript @scriptParams
if ($LASTEXITCODE -ne 0) {
Write-Log "DNS failover script execution failed with exit code: $LASTEXITCODE" -Level "ERROR"
return $false
}
# Get new active host after failover
$newActiveHost = Get-ActiveHost @params
Write-Log "New active host after failover: $newActiveHost" -Level "SUCCESS"
# If the host didn't change, something went wrong
if ($newActiveHost -eq $activeHost) {
Write-Log "Failover appears to have failed - active host did not change" -Level "WARNING"
# Continue anyway to record the attempt and send notification
}
# Record the failover
Record-Failover -Trigger "$ErrorType threshold reached (EventID: $EventId)" -FromServer $activeHost -ToServer $newActiveHost
# Send notification
Send-FailoverNotification -OldServer $activeHost -NewServer $newActiveHost -ErrorType $ErrorType -EventId $EventId -EventMessage $EventMessage -Config $Config
Write-Log "Failover process completed: $activeHost -> $newActiveHost" -Level "SUCCESS"
return $true
}
catch {
Write-Log "Error executing DNS failover: $_" -Level "ERROR"
return $false
}
}
# Process an error event and determine if failover is needed
function Process-ErrorEvent {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[string]$ErrorType,
[Parameter(Mandatory = $true)]
[string]$SourceServer,
[Parameter(Mandatory = $false)]
[string]$EventMessage = "No message provided",
[Parameter(Mandatory = $false)]
[int]$EventId = 0,
[Parameter(Mandatory = $false)]
[switch]$ForceFailover
)
try {
# Load configuration
$config = Get-FailoverConfig
# Check if in cooldown period
$cooldownCheck = Test-FailoverCooldown -Config $config
if ($cooldownCheck.InCooldown -and -not $ForceFailover) {
Write-Log "System is in cooldown period. $($cooldownCheck.RemainingMinutes) minutes remaining until next failover can be triggered" -Level "WARNING"
return $false
}
# Update error count and get status
$errorStatus = Update-ErrorOccurrence -ErrorType $ErrorType -SourceServer $SourceServer -Config $config -EventMessage $EventMessage -EventId $EventId
# Check if server is active
if (-not $errorStatus.IsActive -and -not $config.processInactiveServerErrors -and -not $ForceFailover) {
Write-Log "Ignoring error from inactive server $SourceServer (configure 'processInactiveServerErrors=true' to change this behavior)" -Level "INFO"
return $false
}
# Check if threshold is reached or force flag is set
if ($errorStatus.Count -ge $config.errorThreshold -or $ForceFailover) {
$triggerReason = if ($ForceFailover) { "forced failover" } else { "threshold reached" }
Write-Log "Triggering failover due to $triggerReason : $($errorStatus.Count) errors of type $ErrorType detected in $($config.windowMinutes) minute window" -Level "WARNING"
# Execute failover
$failoverResult = Invoke-DNSFailover -Config $config -ErrorType $ErrorType -EventId $EventId -EventMessage $EventMessage
if ($failoverResult) {
Write-Log "Failover executed successfully" -Level "SUCCESS"
}
else {
Write-Log "Failover execution failed" -Level "ERROR"
}
return $failoverResult
}
else {
Write-Log "Error threshold not yet reached: $($errorStatus.Count)/$($config.errorThreshold) errors in $($config.windowMinutes) minute window" -Level "INFO"
return $false
}
}
catch {
Write-Log "Error processing error event: $_" -Level "ERROR"
return $false
}
}
# Configure Windows Event Forwarding for COMException monitoring
function Initialize-EventSubscription {
[CmdletBinding()]
param()
try {
# Ensure the Event Collector service is running
$service = Get-Service -Name Wecsvc -ErrorAction SilentlyContinue
if (-not $service) {
Write-Log "Windows Event Collector service (Wecsvc) not found" -Level "ERROR"
return $false
}
if ($service.Status -ne 'Running') {
Write-Log "Starting Windows Event Collector service" -Level "INFO"
Start-Service -Name Wecsvc
}
# Configure event collector
Write-Log "Configuring Windows Event Collector" -Level "INFO"
wecutil qc -quiet
# Create subscription XML
$subscriptionName = "ApplicationErrorMonitoring"
$subscriptionXml = @"
<Subscription xmlns="http://schemas.microsoft.com/2006/03/windows/events/subscription">
<SubscriptionId>$subscriptionName</SubscriptionId>
<SubscriptionType>SourceInitiated</SubscriptionType>
<Description>Monitor for COMException errors</Description>
<Enabled>true</Enabled>
<Uri>http://schemas.microsoft.com/wbem/wsman/1/windows/EventLog</Uri>
<ConfigurationMode>Custom</ConfigurationMode>
<Delivery Mode="Push">
<Batching>
<MaxItems>1</MaxItems>
<MaxLatencyTime>1000</MaxLatencyTime>
</Batching>
<PushSettings>
<Heartbeat Interval="900000"/>
</PushSettings>
</Delivery>
<Query>
<![CDATA[
<QueryList>
<Query Id="0">
<Select Path="Application">*[System[(Level=1 or Level=2 or Level=3) and EventData[Data and (Data contains 'COMException')]]]</Select>
</Query>
</QueryList>
]]>
</Query>
<ReadExistingEvents>false</ReadExistingEvents>
<TransportName>HTTP</TransportName>
<ContentFormat>RenderedText</ContentFormat>
<Locale Language="en-US"/>
<LogFile>ForwardedEvents</LogFile>
<AllowedSourceDomainComputers>O:NSG:NSD:(A;;GA;;;DC)(A;;GA;;;NS)</AllowedSourceDomainComputers>
</Subscription>
"@
$subscriptionPath = Join-Path $PSScriptRoot "event_subscription.xml"
$subscriptionXml | Out-File -FilePath $subscriptionPath -Encoding utf8
# Create or update the subscription
$existingSubscription = wecutil es | Where-Object { $_ -eq $subscriptionName }
if ($existingSubscription) {
Write-Log "Updating existing subscription: $subscriptionName" -Level "INFO"
wecutil ss $subscriptionName /c:$subscriptionPath
}
else {
Write-Log "Creating new subscription: $subscriptionName" -Level "INFO"
wecutil cs $subscriptionPath
}
# Get configuration to determine servers to monitor
$config = Get-FailoverConfig
# Check if there are servers defined for monitoring in config
if ($config.monitoredServers -and $config.monitoredServers.Count -gt 0) {
Write-Log "Adding monitored servers to subscription" -Level "INFO"
# Create temp file for computer list
$computerListPath = Join-Path $env:TEMP "monitored_computers.txt"
$config.monitoredServers | Out-File -FilePath $computerListPath -Encoding utf8
# Add computers to subscription
wecutil ss $subscriptionName /cf:$computerListPath
# Clean up temp file
Remove-Item $computerListPath -Force
}
else {
Write-Log "No monitored servers defined in configuration. You need to add servers manually or update config." -Level "WARNING"
}
Write-Log "Event subscription setup complete" -Level "SUCCESS"
return $true
}
catch {
Write-Log "Error initializing event subscription: $_" -Level "ERROR"
return $false
}
}
# Setup event watcher for forwarded events
function Start-EventWatcher {
[CmdletBinding()]
param()
try {
# Create event query for ForwardedEvents log
$query = New-Object System.Diagnostics.Eventing.Reader.EventLogQuery(
"ForwardedEvents",
[System.Diagnostics.Eventing.Reader.StandardEventLogMode]::Default
)
# Create watcher
$script:EventWatcher = New-Object System.Diagnostics.Eventing.Reader.EventLogWatcher($query)
# Register event handler
Register-ObjectEvent -InputObject $script:EventWatcher -EventName EventRecordWritten -Action {
$event = $EventArgs.EventRecord
# Only process if we have a valid event
if ($event) {
try {
# Check for COMException pattern
if ($event.Message -like "*COMException*") {
$sourceServer = $event.MachineName
$eventId = $event.Id
$eventMessage = $event.Message
# Call our error processing function
Process-ErrorEvent -ErrorType "COMException" -SourceServer $sourceServer -EventMessage $eventMessage -EventId $eventId
}
}
catch {
# Log the error but don't crash the event handler
$timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
$errorMessage = "[$timestamp] [ERROR] Error in event handler: $_"
Add-Content -Path $using:LogFilePath -Value $errorMessage
}
}
} | Out-Null
# Enable the watcher
$script:EventWatcher.Enabled = $true
Write-Log "Event watcher started. Now monitoring for COMException events." -Level "SUCCESS"
return $true
}
catch {
Write-Log "Error starting event watcher: $_" -Level "ERROR"
return $false
}
}
# Stop the event watcher
function Stop-EventWatcher {
[CmdletBinding()]
param()
if ($script:EventWatcher) {
try {
$script:EventWatcher.Enabled = $false
$script:EventWatcher.Dispose()
$script:EventWatcher = $null
Write-Log "Event watcher stopped" -Level "INFO"
}
catch {
Write-Log "Error stopping event watcher: $_" -Level "ERROR"
}
}
}
# Run a test by simulating an error event
function Test-ErrorSimulation {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true)]
[string]$Server,
[Parameter(Mandatory = $true)]
[string]$ErrorType,
[Parameter(Mandatory = $false)]
[switch]$ForceFailover
)
try {
Write-Log "Running test error simulation for $ErrorType on server $Server" -Level "INFO"
# Create a simulated event message
$eventMessage = @"
Test $ErrorType error generated for testing purposes.
Process: TestProcess.exe
Error Details: System.Runtime.InteropServices.COMException (0x80004005): Error HRESULT E_FAIL has been returned from a call to a COM component.
at System.Runtime.InteropServices.Marshal.ThrowExceptionForHR(Int32 errorCode)
at TestComponent.TestMethod()
"@
# Process the simulated error
$result = Process-ErrorEvent -ErrorType $ErrorType -SourceServer $Server -EventMessage $eventMessage -EventId 9999 -ForceFailover:$ForceFailover
if ($result) {
Write-Log "Test error simulation successfully triggered failover" -Level "SUCCESS"
}
else {
Write-Log "Test error simulation did not trigger failover" -Level "INFO"
}
return $result
}
catch {
Write-Log "Error during test error simulation: $_" -Level "ERROR"
return $false
}
}
# Main function to run the monitor as a service
function Start-FailoverMonitor {
[CmdletBinding()]
param (
[Parameter(Mandatory = $false)]
[switch]$AsService
)
try {
# Rotate log file if needed
Rotate-LogFile -LogPath $LogFilePath
Write-Log "=======================================================" -Level "INFO"
Write-Log "Starting Failover Monitor v$script:Version" -Level "INFO"
Write-Log "=======================================================" -Level "INFO"
# Import DNS Failover functions
Import-DNSFailoverFunctions
# Get configuration
$config = Get-FailoverConfig
Write-Log "Configuration loaded" -Level "DEBUG"
# Initialize event subscription
Initialize-EventSubscription
# Start event watcher
Start-EventWatcher
if (-not $AsService) {
# If running interactively, keep script alive
Write-Log "Failover Monitor is now running. Press Ctrl+C to stop." -Level "INFO"
try {
while ($true) {
Start-Sleep -Seconds 60
# Rotate log file periodically
Rotate-LogFile -LogPath $LogFilePath
}
}
finally {
Stop-EventWatcher
Write-Log "Failover Monitor stopped" -Level "INFO"
}
}
else {
# When running as a service, just return - the event registration keeps it active
Write-Log "Failover Monitor service is now running" -Level "INFO"
return
}
}
catch {
Write-Log "Critical error in Failover Monitor: $_" -Level "ERROR"
Stop-EventWatcher
throw
}
}
#endregion Functions
#region Main Execution
# Handle script parameters
if ($Initialize) {
Write-Log "Initializing Failover Monitor" -Level "INFO"
try {
# Import DNS Failover functions
Import-DNSFailoverFunctions
# Initialize configuration
$config = Get-FailoverConfig
# Initialize event subscription
$result = Initialize-EventSubscription
if ($result) {
Write-Log "Initialization completed successfully" -Level "SUCCESS"
}
else {
Write-Log "Initialization completed with warnings" -Level "WARNING"
}
}
catch {
Write-Log "Initialization failed: $_" -Level "ERROR"
exit 1
}
exit 0
}
elseif ($TestError) {
Write-Log "Running error simulation test" -Level "INFO"
try {
# Import DNS Failover functions
Import-DNSFailoverFunctions
# Run test
$result = Test-ErrorSimulation -Server $Server -ErrorType $ErrorType -ForceFailover:$Force
if ($result) {
Write-Log "Test completed successfully and triggered failover" -Level "SUCCESS"
}
else {
Write-Log "Test completed but did not trigger failover" -Level "INFO"
}
}
catch {
Write-Log "Test failed: $_" -Level "ERROR"
exit 1
}
exit 0
}
else {
# Start the monitor
Start-FailoverMonitor
}
#endregion Main Execution
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment