Skip to content

Instantly share code, notes, and snippets.

View pboothe's full-sized avatar
Always busy, always online, always ready for a cup of coffee

Peter Boothe pboothe

Always busy, always online, always ready for a cup of coffee
View GitHub Profile
did_they_come_back() {
echo "#### Starting did_they_come_back() ####"
#TODO: Make this take out bad entries from $REBOOT_CANDIDATES
for line in `cat "${REBOOT_ATTEMPTED}"`; do
attempted_host=`echo $line | awk -F: '{ print $1 }'`
if grep -q "${attempted_host}" "${REBOOT_CANDIDATES}" ; then
echo "${attempted_host} still not up but requested during last run:" \
$line |tee -a ${PROBLEMATIC} "${NOTIFICATION_EMAIL}" > /dev/null
else
did_they_come_back() {
echo "#### Starting did_they_come_back() ####"
#TODO: Make this take out bad entries from $REBOOT_CANDIDATES
for line in `cat "${REBOOT_ATTEMPTED}"`; do
attempted_host=`echo $line | awk -F: '{ print $1 }'`
if grep -q "${attempted_host}" "${REBOOT_CANDIDATES}" ; then
echo "${attempted_host} still not up but requested during last run:" \
$line |tee -a ${PROBLEMATIC} "${NOTIFICATION_EMAIL}" > /dev/null
else
has_it_been_24_hrs() {
echo "#### Starting has_it_been_24_hrs ####"
echo "Contents of REBOOT_CANDIDATES (/tmp/rebot-testing/ssh_outage/reboot_candidates:"
cat ${REBOOT_CANDIDATES}
echo "Contents of REBOOT_LOG (/tmp/rebot-testing/reboot_history/reboot_log:"
cat ${REBOOT_LOG}
#echo "# Starting timestamp comparison #"
echo ""
rm -f "${REBOOT_CANDIDATES}.tmp" && touch "${REBOOT_CANDIDATES}.tmp"
has_it_been_24_hrs() {
echo "#### Starting has_it_been_24_hrs ####"
echo "Contents of REBOOT_CANDIDATES (/tmp/rebot-testing/ssh_outage/reboot_candidates:"
cat ${REBOOT_CANDIDATES}
echo "Contents of REBOOT_LOG (/tmp/rebot-testing/reboot_history/reboot_log:"
cat ${REBOOT_LOG}
#echo "# Starting timestamp comparison #"
echo ""
rm -f "${REBOOT_CANDIDATES}.tmp" && touch "${REBOOT_CANDIDATES}.tmp"
#!/bin/bash
#
# Reboot crashed hosts. A host is rebooted if:
# 1) baseList.pl shows ssh (later, host) and sshalt alert as down for the host
# 2) the switch for the site is up
# 3) the host isn't a straggler from the last run
# 4) The host hasn't been rebooted in the last 24 hours
# 5) There are no more than 5 hosts in the current reboot queue
# Any machines that came back from the previous run are recorded.
#
#!/bin/bash
SERVER=XXXXX
if [ ! -d stress_test_results ]
then
echo You need to create a directory called stress_test_results
exit 1
fi
mkdir stress_test_results/ws