taidos · January 6, 2022 19:57
diff --git a/slowdown-upcp.hook b/slowdown-upcp.hook
 #!/bin/bash

 # Pre-upcp hook which will deny upcp in case the server already has IO,
 # chef-client, cdp etc. are running..!
 # Story: https://wwwhosting.atlassian.net/browse/SH-222

 # Set limit load avg. for VPS/hardware
 # Defaults to 6 for VPS and 10 for hardware ones
 if [[ $(/sbin/lspci | wc -l) -eq 0 ]]; then
  limit_load=6
 else
  limit_load=10
 fi
 limit_iowait=40

 # Maximum retries
 max_wait=6

 # Log
 log="/var/log/upcp/slowdown-upcp-hook.log"

 _log() {
  echo "$(date '+%m-%d-%Y_%H:%M') $1" >> $log
 }

 # Failed update tracker
 tracker="/usr/local/cpanel/logs/failed.upcp"

 # Alert e-mail
 alert_email="[email protected]"

 # Failure points
 failure_points=""

 # Force run
 force="/etc/force_upcp"

 ## Functions ##

 # Check if hcp is doing its thing
 _check_hcp() {
  if [ -x /usr/sbin/hcp ] && /usr/sbin/hcp -l | grep -q "Virtual Device:" ; then
    failure_points="CDP $failure_points"
  fi
 }

 # See if load avg. is above what's set
 _check_loadavg() {
  load=$(cut -f1 -d. /proc/loadavg)
  io_wait=$(/usr/bin/iostat | awk '{print $5}' | grep -A1 '%iowait' | tail -1 | cut -f1 -d.)
  if [[ $load -gt $limit_load || $io_wait -gt $limit_iowait ]]; then
    failure_points="LOAD $failure_points"
  fi
 }

 # Check if chef is running
 _check_chef() {
  if pgrep -u root chef-client &>/dev/null ; then
    failure_points="CHEF $failure_points"
  fi
 }

 # Check if it's peak hour
 _peak_hours() {
  cur_time_hour=$(date +"%H" | sed 's/^0*//')
  cur_time_min=$(date +"%M" | sed 's/^0*//')
  if seq 6 11 | grep -w $cur_time_hour ; then
    sleep_hours=$(( 11 - cur_time_hour ))
    sleep_mins=$(( 60 - cur_time_min ))
    sleep_for=$(( sleep_hours * 3600 + sleep_mins * 60 ))
    echo "$(date '+%m-%d-%Y_%H:%M') Stalling upcp for $sleep_for seconds due to: Peak Hours detected." >> $log
    sleep $sleep_for
  fi
 }

 # Upcp killer script
 _upcp_killer() {
  if [ ! -f $tracker ]; then
    touch $tracker
  else
    if find $tracker -mtime +3 1>/dev/null 2>&1; then
      mail_subject="Forcing upcp run on server $(hostname)"
      mail_body="Forcing upcp. \n\n Last 20 lines of $log: \n\n $(tail -20 $log)"
      _log "Forcing upcp run"
      echo -e "$mail_body" | mail -s "$mail_subject" "$alert_email"
      exit 0
    fi
  fi

  pgrep -u root "cPanel Update" | while read -r pid ; do
    kill -15 $pid
  done

  _log "Update denied"
 }

 ## Start ##

 _log "Hook triggered"

 if [ -f $force ]; then
  _log "Forcing upcp run"
  exit 0
 else
  for ((try=1; try<=max_wait; try++)) ; do
    failure_points=""
    # _peak_hours should be always called first before any other failure points
    # as calling it after might lead to false positives
    _peak_hours
    _check_hcp
    _check_loadavg
    _check_chef
    if [ ! "$failure_points" ]; then
      _log "Update allowed to run"
      exit 0
    else
      # Sleep 30 mins on the first try, 60 on the next, 90, 120 ..
      wait_for=$((1800*$try))
      _log "Stalling upcp for $wait_for seconds due to: $failure_points"
      sleep $wait_for
    fi
  done
 fi

 _upcp_killer && exit 1
	#!/bin/bash

	# Pre-upcp hook which will deny upcp in case the server already has IO,
	# chef-client, cdp etc. are running..!
	# Story: https://wwwhosting.atlassian.net/browse/SH-222

	# Set limit load avg. for VPS/hardware
	# Defaults to 6 for VPS and 10 for hardware ones
	if [[ $(/sbin/lspci \| wc -l) -eq 0 ]]; then
	limit_load=6
	else
	limit_load=10
	fi
	limit_iowait=40

	# Maximum retries
	max_wait=6

	# Log
	log="/var/log/upcp/slowdown-upcp-hook.log"

	_log() {
	echo "$(date '+%m-%d-%Y_%H:%M') $1" >> $log
	}

	# Failed update tracker
	tracker="/usr/local/cpanel/logs/failed.upcp"

	# Alert e-mail
	alert_email="[email protected]"

	# Failure points
	failure_points=""

	# Force run
	force="/etc/force_upcp"

	## Functions ##

	# Check if hcp is doing its thing
	_check_hcp() {
	if [ -x /usr/sbin/hcp ] && /usr/sbin/hcp -l \| grep -q "Virtual Device:" ; then
	failure_points="CDP $failure_points"
	fi
	}

	# See if load avg. is above what's set
	_check_loadavg() {
	load=$(cut -f1 -d. /proc/loadavg)
	io_wait=$(/usr/bin/iostat \| awk '{print $5}' \| grep -A1 '%iowait' \| tail -1 \| cut -f1 -d.)
	if [[ $load -gt $limit_load \|\| $io_wait -gt $limit_iowait ]]; then
	failure_points="LOAD $failure_points"
	fi
	}

	# Check if chef is running
	_check_chef() {
	if pgrep -u root chef-client &>/dev/null ; then
	failure_points="CHEF $failure_points"
	fi
	}

	# Check if it's peak hour
	_peak_hours() {
	cur_time_hour=$(date +"%H" \| sed 's/^0*//')
	cur_time_min=$(date +"%M" \| sed 's/^0*//')
	if seq 6 11 \| grep -w $cur_time_hour ; then
	sleep_hours=$(( 11 - cur_time_hour ))
	sleep_mins=$(( 60 - cur_time_min ))
	sleep_for=$(( sleep_hours * 3600 + sleep_mins * 60 ))
	echo "$(date '+%m-%d-%Y_%H:%M') Stalling upcp for $sleep_for seconds due to: Peak Hours detected." >> $log
	sleep $sleep_for
	fi
	}

	# Upcp killer script
	_upcp_killer() {
	if [ ! -f $tracker ]; then
	touch $tracker
	else
	if find $tracker -mtime +3 1>/dev/null 2>&1; then
	mail_subject="Forcing upcp run on server $(hostname)"
	mail_body="Forcing upcp. \n\n Last 20 lines of $log: \n\n $(tail -20 $log)"
	_log "Forcing upcp run"
	echo -e "$mail_body" \| mail -s "$mail_subject" "$alert_email"
	exit 0
	fi
	fi

	pgrep -u root "cPanel Update" \| while read -r pid ; do
	kill -15 $pid
	done

	_log "Update denied"
	}

	## Start ##

	_log "Hook triggered"

	if [ -f $force ]; then
	_log "Forcing upcp run"
	exit 0
	else
	for ((try=1; try<=max_wait; try++)) ; do
	failure_points=""
	# _peak_hours should be always called first before any other failure points
	# as calling it after might lead to false positives
	_peak_hours
	_check_hcp
	_check_loadavg
	_check_chef
	if [ ! "$failure_points" ]; then
	_log "Update allowed to run"
	exit 0
	else
	# Sleep 30 mins on the first try, 60 on the next, 90, 120 ..
	wait_for=$((1800*$try))
	_log "Stalling upcp for $wait_for seconds due to: $failure_points"
	sleep $wait_for
	fi
	done
	fi

	_upcp_killer && exit 1