Created
April 30, 2021 17:07
-
-
Save TJM/d5d0f71bd5c23d6ad874b6cd68333151 to your computer and use it in GitHub Desktop.
An idea for cluster patching (one at a time) using pe_patch. The `patching.pp` is a copy of pe_patch::group_patching, except for a modification to take $targets directly. The `cluster_patching.pp` is my attempt at a "one at a time" wrapper.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Wrapper for patchy:patching Plan for running 'patching' one node at a time, rather than all at once | |
plan patchy::cluster_patching ( | |
TargetSpec $targets, | |
Optional[Enum['always', 'never', 'patched', 'smart']] $reboot = 'patched', | |
Optional[String] $yum_params = undef, | |
Optional[String] $dpkg_params = undef, | |
Optional[String] $zypper_params = undef, | |
Optional[Integer] $patch_task_timeout = 3600, | |
Optional[Integer] $health_check_runinterval = 1800, | |
Optional[Integer] $reboot_wait_time = 600, | |
Optional[Boolean] $security_only = false, | |
Optional[Boolean] $run_health_check = true, | |
Optional[Boolean] $clean_cache = false, | |
Optional[Boolean] $health_check_noop = false, | |
Optional[Boolean] $health_check_use_cached_catalog = false, | |
Optional[Boolean] $health_check_service_enabled = true, | |
Optional[Boolean] $health_check_service_running = true, | |
Optional[Pe_patch::Absolutepath] $post_reboot_scriptpath = undef, | |
){ | |
# Get Target List, | |
$target_array = get_targets($targets) | |
# Recreate Argument List | |
$args = { | |
'reboot' => $reboot, | |
'yum_params' => $yum_params, | |
'dpkg_params' => $dpkg_params, | |
'zypper_params' => undef, | |
'patch_task_timeout' => $patch_task_timeout, | |
'health_check_runinterval' => $health_check_runinterval, | |
'reboot_wait_time' => $reboot_wait_time, | |
'security_only' => $security_only, | |
'run_health_check' => $run_health_check, | |
'clean_cache' => $clean_cache, | |
'health_check_noop' => $health_check_noop, | |
'health_check_use_cached_catalog' => $health_check_use_cached_catalog, | |
'health_check_service_enabled' => $health_check_service_enabled, | |
'health_check_service_running' => $health_check_service_running, | |
'post_reboot_scriptpath' => $post_reboot_scriptpath, | |
} | |
$results = $target_array.reduce({}) |$memo, $target| { | |
$result = run_plan('patchy::patching', $target, $args) | |
$memo + { $target.name => $result } | |
} | |
# Output the results | |
return($results) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a copy of pe_patch::group_patching, but adapted to take $targets instead of a patch_group | |
plan patchy::patching ( | |
TargetSpec $targets, | |
Optional[Enum['always', 'never', 'patched', 'smart']] $reboot = 'patched', | |
Optional[String] $yum_params = undef, | |
Optional[String] $dpkg_params = undef, | |
Optional[String] $zypper_params = undef, | |
Optional[Integer] $patch_task_timeout = 3600, | |
Optional[Integer] $health_check_runinterval = 1800, | |
Optional[Integer] $reboot_wait_time = 600, | |
Optional[Boolean] $security_only = false, | |
Optional[Boolean] $run_health_check = true, | |
Optional[Boolean] $clean_cache = false, | |
Optional[Boolean] $health_check_noop = false, | |
Optional[Boolean] $health_check_use_cached_catalog = false, | |
Optional[Boolean] $health_check_service_enabled = true, | |
Optional[Boolean] $health_check_service_running = true, | |
Optional[Pe_patch::Absolutepath] $post_reboot_scriptpath = undef, | |
){ | |
# Get Target List, | |
$target_array = get_targets($targets) | |
unless $target_array.empty { | |
### Health Check, Input: $target_array, Output: $patch_ready ### | |
### Add'l result params: $puppet_not_healthy, $pre_patch_puppet_run_failed ### | |
if $run_health_check { | |
# Check the health of the puppet agent on all nodes | |
# Ensure puppet configuration is as expected, agent hasn't been disabled | |
# with puppet agent --disable, puppet ssl verify passes, the puppet | |
# service is in the right state, all servers are reachable, and the | |
# last puppet run didn't have failures. | |
$agent_health = run_task('pe_patch::agent_health', $target_array, | |
target_runinterval => $health_check_runinterval, | |
target_noop_state => $health_check_noop, | |
target_use_cached_catalog_state => $health_check_use_cached_catalog, | |
target_service_enabled => $health_check_service_enabled, | |
target_service_running => $health_check_service_running, | |
'_catch_errors' => true) | |
# Pull out list of those that are ok/in error | |
$puppet_healthy = $agent_health.ok_set.names | |
$puppet_not_healthy = $agent_health.error_set.results.map | $error | { $error.error.details } | |
if $puppet_healthy.empty { | |
$patch_ready = [] | |
} else { | |
$pre_patch_run_puppet_check = run_task('enterprise_tasks::run_puppet', $puppet_healthy, | |
max_timeout => 256, | |
'_catch_errors' => true) | |
$patch_ready = $pre_patch_run_puppet_check.ok_set.names | |
$pre_patch_puppet_run_failed = $pre_patch_run_puppet_check.error_set.names | |
} | |
} else { | |
$patch_ready = $certnames | |
} | |
### Patching, Input: $patch_ready, Output: $post_patch_ready ### | |
### Add'l result params: $not_patched, $reboot_timed_out ### | |
if $patch_ready.empty { | |
$post_patch_ready = [] | |
} else { | |
# So we can detect when a node has rebooted | |
$begin_boot_time_results = without_default_logging() || { | |
run_task('pe_patch::last_boot_time', $patch_ready) | |
} | |
# Actually carry out the patching on all healthy nodes | |
$patch_result = run_task('pe_patch::patch_server', | |
$patch_ready, | |
yum_params => $yum_params, | |
dpkg_params => $dpkg_params, | |
zypper_params => $zypper_params, | |
timeout => $patch_task_timeout, | |
reboot => $reboot, | |
security_only => $security_only, | |
clean_cache => $clean_cache, | |
'_catch_errors' => true) | |
# Pull out list of those that are ok/in error | |
$patched = $patch_result.ok_set.names | |
$not_patched = $patch_result.error_set.names | |
$rebooting_result = $patch_result.ok_set.results.filter | $result | { $result.value['was_rebooted'] } | |
$rebooting = $rebooting_result.map | $result | { $result.target.name } | |
### Wait for Reboot ### | |
if $rebooting.empty { | |
$post_patch_ready = $patched | |
} else { | |
# Adapted from puppetlabs-reboot | |
$start_time = Timestamp() | |
$wait_results = without_default_logging() || { | |
$reboot_wait_time.reduce({'pending' => $rebooting, 'ok' => []}) |$memo, $_| { | |
if ($memo['pending'].empty or $memo['timed_out']) { | |
break() | |
} | |
$plural = $memo['pending'].size > 1 ? { | |
true => 's', | |
default => '', | |
} | |
out::message("Waiting for ${$memo['pending'].size} node${plural} to reboot. Note that a failed pe_patch::last_boot_time task is normal while a target is in the middle of rebooting, and may be safely ignored.") | |
$current_boot_time_results = run_task('pe_patch::last_boot_time', $memo['pending'], _catch_errors => true) | |
$failed_results = $current_boot_time_results.filter |$current_boot_time_res| { | |
# If we errored, need to check again, since it's probably still rebooting | |
if !$current_boot_time_res.ok { | |
true | |
} else { | |
# If the boot time is the same as it was before we patched, | |
# we haven't rebooted yet and need to check again. | |
$target_name = $current_boot_time_res.target.name | |
$begin_boot_time_res = $begin_boot_time_results.find($target_name) | |
$current_boot_time_res.value == $begin_boot_time_res.value | |
} | |
} | |
# Turn array of results into ResultSet to we can extract Targets | |
$failed_targets = ResultSet($failed_results).targets.map |$t| { $t.name } | |
$ok_targets = $memo['pending'] - $failed_targets | |
$elapsed_time_sec = Integer(Timestamp() - $start_time) | |
$timed_out = $elapsed_time_sec >= $reboot_wait_time | |
if !$failed_targets.empty and !$timed_out { | |
# Wait for targets to be available again before rechecking. If we end up failing | |
# this wait on any of those nodes, we'll catch it in the next iteration. | |
pe_patch::sleep(30) | |
$remaining_time = $reboot_wait_time - $elapsed_time_sec | |
wait_until_available($failed_targets, wait_time => $remaining_time, retry_interval => 1, '_catch_errors' => true) | |
} | |
({ | |
'pending' => $failed_targets, | |
'ok' => $memo['ok'] + $ok_targets, | |
'timed_out' => $timed_out, | |
}) | |
} | |
} | |
$reboot_timed_out = $wait_results['pending'] | |
$post_patch_ready = $patched - $reboot_timed_out | |
} | |
} | |
### Post reboot script, Input: $post_patch_ready, Output: None ### | |
# Run the post_reboot_scriptpath, if defined. Don't fail the plan | |
# if the script fails. The user will be able to see the result in | |
# the console. | |
if $post_reboot_scriptpath { | |
run_command($post_reboot_scriptpath, $post_patch_ready, '_catch_errors' => true) | |
} | |
### Post patching health check, Input: $post_patch_ready, Output: $post_patch_puppet_run_passed ### | |
### Add'l result params: $post_patch_puppet_run_failed ### | |
if $post_patch_ready.empty or !$run_health_check { | |
$post_patch_puppet_run_passed = $post_patch_ready | |
} else { | |
# Sometimes a puppet run immediately after reboot fails, so give it a bit of time. | |
pe_patch::sleep(30) | |
$post_puppet_check = run_task('enterprise_tasks::run_puppet', $post_patch_ready, | |
max_timeout => 256, | |
'_catch_errors' => true) | |
$post_patch_puppet_run_passed = $post_puppet_check.ok_set.names | |
$post_patch_puppet_run_failed = $post_puppet_check.error_set.names | |
} | |
} | |
### Defaults ### | |
# Note: $targets and $target_array are always defined, | |
# so no need to set a default value here. | |
$puppet_not_healthy_result = defined('$puppet_not_healthy') ? { | |
true => $puppet_not_healthy, | |
default => [], | |
} | |
$pre_patch_puppet_run_failed_result = defined('$pre_patch_puppet_run_failed') ? { | |
true => $pre_patch_puppet_run_failed, | |
default => [], | |
} | |
$patched_result = defined('$patched') ? { | |
true => $patched, | |
default => [], | |
} | |
$not_patched_result = defined('$not_patched') ? { | |
true => $not_patched, | |
default => [], | |
} | |
$post_patch_puppet_run_failed_result = defined('$post_patch_puppet_run_failed') ? { | |
true => $post_patch_puppet_run_failed, | |
default => [], | |
} | |
$reboot_timed_out_result = defined('$reboot_timed_out') ? { | |
true => $reboot_timed_out, | |
default => [], | |
} | |
# Output the results | |
return({ | |
'targets' => $targets, | |
'patchable_nodes' => $target_array, | |
'puppet_health_check_failed' => $puppet_not_healthy_result, | |
'pre_patch_puppet_run_failed' => $pre_patch_puppet_run_failed_result, | |
'patching_failed' => $not_patched_result, | |
'post_patch_puppet_run_failed' => $post_patch_puppet_run_failed_result, | |
'reboot_timed_out' => $reboot_timed_out_result, | |
'nodes_patched' => $patched_result, | |
'counts' => { | |
'patchable_nodes_count' => $target_array.length, | |
'puppet_health_check_failed' => $puppet_not_healthy_result.length, | |
'pre_patch_puppet_run_failed' => $pre_patch_puppet_run_failed_result.length, | |
'patching_failed' => $not_patched_result.length, | |
'post_patch_puppet_run_failed' => $post_patch_puppet_run_failed_result.length, | |
'reboot_timed_out' => $reboot_timed_out_result.length, | |
'nodes_patched' => $patched_result.length, | |
} | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment