-
-
Save taidos/4087b4284247f197645295cf092609eb to your computer and use it in GitHub Desktop.
Site5 cPanel Update "Slowdown" Hook
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Pre-upcp hook which will deny upcp in case the server already has IO, | |
# chef-client, cdp etc. are running..! | |
# Story: https://wwwhosting.atlassian.net/browse/SH-222 | |
# Set limit load avg. for VPS/hardware | |
# Defaults to 6 for VPS and 10 for hardware ones | |
if [[ $(/sbin/lspci | wc -l) -eq 0 ]]; then | |
limit_load=6 | |
else | |
limit_load=10 | |
fi | |
limit_iowait=40 | |
# Maximum retries | |
max_wait=6 | |
# Log | |
log="/var/log/upcp/slowdown-upcp-hook.log" | |
_log() { | |
echo "$(date '+%m-%d-%Y_%H:%M') $1" >> $log | |
} | |
# Failed update tracker | |
tracker="/usr/local/cpanel/logs/failed.upcp" | |
# Alert e-mail | |
alert_email="[email protected]" | |
# Failure points | |
failure_points="" | |
# Force run | |
force="/etc/force_upcp" | |
## Functions ## | |
# Check if hcp is doing its thing | |
_check_hcp() { | |
if [ -x /usr/sbin/hcp ] && /usr/sbin/hcp -l | grep -q "Virtual Device:" ; then | |
failure_points="CDP $failure_points" | |
fi | |
} | |
# See if load avg. is above what's set | |
_check_loadavg() { | |
load=$(cut -f1 -d. /proc/loadavg) | |
io_wait=$(/usr/bin/iostat | awk '{print $5}' | grep -A1 '%iowait' | tail -1 | cut -f1 -d.) | |
if [[ $load -gt $limit_load || $io_wait -gt $limit_iowait ]]; then | |
failure_points="LOAD $failure_points" | |
fi | |
} | |
# Check if chef is running | |
_check_chef() { | |
if pgrep -u root chef-client &>/dev/null ; then | |
failure_points="CHEF $failure_points" | |
fi | |
} | |
# Check if it's peak hour | |
_peak_hours() { | |
cur_time_hour=$(date +"%H" | sed 's/^0*//') | |
cur_time_min=$(date +"%M" | sed 's/^0*//') | |
if seq 6 11 | grep -w $cur_time_hour ; then | |
sleep_hours=$(( 11 - cur_time_hour )) | |
sleep_mins=$(( 60 - cur_time_min )) | |
sleep_for=$(( sleep_hours * 3600 + sleep_mins * 60 )) | |
echo "$(date '+%m-%d-%Y_%H:%M') Stalling upcp for $sleep_for seconds due to: Peak Hours detected." >> $log | |
sleep $sleep_for | |
fi | |
} | |
# Upcp killer script | |
_upcp_killer() { | |
if [ ! -f $tracker ]; then | |
touch $tracker | |
else | |
if find $tracker -mtime +3 1>/dev/null 2>&1; then | |
mail_subject="Forcing upcp run on server $(hostname)" | |
mail_body="Forcing upcp. \n\n Last 20 lines of $log: \n\n $(tail -20 $log)" | |
_log "Forcing upcp run" | |
echo -e "$mail_body" | mail -s "$mail_subject" "$alert_email" | |
exit 0 | |
fi | |
fi | |
pgrep -u root "cPanel Update" | while read -r pid ; do | |
kill -15 $pid | |
done | |
_log "Update denied" | |
} | |
## Start ## | |
_log "Hook triggered" | |
if [ -f $force ]; then | |
_log "Forcing upcp run" | |
exit 0 | |
else | |
for ((try=1; try<=max_wait; try++)) ; do | |
failure_points="" | |
# _peak_hours should be always called first before any other failure points | |
# as calling it after might lead to false positives | |
_peak_hours | |
_check_hcp | |
_check_loadavg | |
_check_chef | |
if [ ! "$failure_points" ]; then | |
_log "Update allowed to run" | |
exit 0 | |
else | |
# Sleep 30 mins on the first try, 60 on the next, 90, 120 .. | |
wait_for=$((1800*$try)) | |
_log "Stalling upcp for $wait_for seconds due to: $failure_points" | |
sleep $wait_for | |
fi | |
done | |
fi | |
_upcp_killer && exit 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment