Created
July 26, 2016 13:56
-
-
Save gasgasalterego/0bb06963f069417058d999994c794a61 to your computer and use it in GitHub Desktop.
KeepAlived configuration for automatic switch of virtual IP between two MySQL instances configured in Master/Slave and with circular replica.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################################################################################## | |
# This configuration needs 2 fails in order to change from current state to FAULT state and needs 2 success to change from BACKUP state to | |
# MASTER state. | |
# | |
# !!!MOST IMPORTANT!!! | |
# BEFORE keepalived can be started this file must be poperly edited (see HOW TO section below) | |
# | |
# HOW TO | |
# 1 - Check all the parameters of the script(s) in the "vrrp_script check_mysql" section below are correct | |
# 2 - unicast_src_ip (IP of the local machine) must be manually edited | |
# 3 - unicast_peer (IP of the other machine which ensures HA) must be manually edited | |
# NOTE: remember that, in case the vrtual IP is meant to be modified, in addition to change it into the whole configuration file, is also a | |
# good practice to change router_id, vrrp_instance and virtual_router_id accordingly so that the id matches the last octet of the vip. | |
# | |
# START KEEPALIVED | |
# 1 - check mysql and keepalived service status, the output should be like these: | |
# [root@test-ha01 ~]# chkconfig --list|grep 'keepalived\|mysql' | |
# keepalived 0:off 1:off 2:off 3:off 4:off 5:off 6:off | |
# mysql 0:off 1:off 2:on 3:on 4:on 5:on 6:off | |
# 2 - change service configuration in order to make keepalived start at machine start and mysql not | |
# [root@test-ha01 ~]# chkconfig --level 2345 keepalived on | |
# [root@test-ha01 ~]# chkconfig --level 2345 mysql off | |
# 3 - check service status, the output should be like this | |
# [root@test-ha01 ~]# chkconfig --list|grep 'keepalived\|mysql' | |
# keepalived 0:off 1:off 2:on 3:on 4:on 5:on 6:off | |
# mysql 0:off 1:off 2:off 3:off 4:off 5:off 6:off | |
# 4 - start keepalived service | |
# | |
############################################################################################################################################## | |
global_defs { | |
notification_email { | |
[email protected] | |
} | |
#smtp_server smt1.domain.com | |
#smtp_connect_timeout 30 | |
## The $ID is a unique number, the logic is that this value is the last number in the VIP. | |
router_id 1 | |
debug | |
} | |
vrrp_script mysql { | |
# script "/usr/local/vbin/keepalived/keepalived_mysql_check.sh $VIP $INTERFACE $INTERFACE_ALIAS | |
script "/usr/local/vbin/keepalived/keepalived_mysql_check.sh 10.10.10.1 eth0 pg" | |
interval 30 # checks every 30 seconds | |
fall 2 # requires 2 failures for KO | |
rise 2 # requires successes for OK switch | |
} | |
## The $ID is a unique number, the logic is that this value is the last number in the VIP. | |
vrrp_instance dbvip_1 { | |
virtual_router_id 1 | |
## MASTER : the master instance | |
## BACKUP : the fail-over instance | |
state BACKUP|MASTER | |
# interface $INTERFACE | |
interface eth0 | |
priority 100 | |
advert_int 1 | |
nopreempt | |
# unicast_src_ip xx.xx.xx.xx #IP Address of local machine. NOTE: this is mandatory if multicast is forbidden in your network | |
unicast_src_ip 10.10.10.2 | |
unicast_peer { | |
# xx.xx.xx.xx #IP Address of other machine(s). NOTE1: this is mandatory if multicast is forbidden in your network. NOTE2: multiple values can be issued here | |
10.10.10.3 | |
} | |
authentication { | |
auth_type AH | |
auth_pass Password | |
} | |
track_script { | |
mysql | |
} | |
virtual_ipaddress { | |
# $VIP dev $INTERFACE label $INTERFACE:$INTERFACE_ALIAS | |
10.10.10.1 dev eth0 label eth0:pg | |
} | |
# notify_master "/usr/local/vbin/keepalived/keepalived_notif.sh MASTER $VIP" | |
notify_master "/usr/local/vbin/keepalived/keepalived_notif.sh MASTER 10.10.10.1" | |
# notify_backup "/usr/local/vbin/keepalived/keepalived_notif.sh BACKUP $VIP" | |
notify_backup "/usr/local/vbin/keepalived/keepalived_notif.sh BACKUP 10.10.10.1" | |
# notify_fault "/usr/local/vbin/keepalived/keepalived_notif.sh FAULT $VIP" | |
notify_fault "/usr/local/vbin/keepalived/keepalived_notif.sh FAULT 10.10.10.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# monitor mysql status | |
virtual_ip=$1 | |
iface=$2 | |
name=$3 | |
report_host=$4 | |
# MySQL Paths and commands | |
MYSQL_USER="u_system" | |
MYSQL_PASS="metsys_u" | |
DATA_DIR="/usr/local/mysql/mysql-$instance/data" | |
MYSQL="/usr/bin/mysql -s -u${MYSQL_USER} -p${MYSQL_PASS}" | |
MYSQLADMIN="/usr/bin/mysqladmin -u${MYSQL_USER} -p${MYSQL_PASS}" | |
R_MYSQL="/usr/bin/mysql -s" | |
R_MYSQLADMIN="/usr/bin/mysqladmin" | |
# Other vars | |
r_user="DB_USER" | |
r_psw="DB_PASS" | |
proc_count=`ps ax|grep -w $$|grep -v grep|wc -l` | |
viface=$iface:$name | |
virt_iface=`echo $viface|cut -c1-16` | |
possessed_ip=`/sbin/ifconfig $iface | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}'` | |
possessed_vip=`/sbin/ifconfig $virt_iface | grep 'inet addr:' | cut -d: -f2 | awk '{ print $1}'` | |
gateway=`/sbin/ip route|awk '/default/ { print $3 }'` | |
service_ip=`echo /sbin/ip route|awk '/${iface}/ { print $9 }'` | |
netmask=`/sbin/ifconfig ${iface}|sed -rn '2s/ .*:(.*)$/\1/p'` | |
pidfile=/var/log/virtual_ip_take_$virtual_ip.pid | |
logfile=/var/log/virtual_ip_take_$virtual_ip.log | |
## FUNCTIONS ######################################################################################################################################################################### | |
function log(){ | |
echo "#[$$]# `date '+%F %T'` $@" >> $logfile | |
} | |
## REPORT SECTION | |
report_host="NoReport" | |
## END REPORT SECTION | |
function notify(){ | |
STATUS=$1 | |
/usr/local/vbin/keepalived/keepalived_notif.sh $STATUS $virtual_ip | |
} | |
function ping_gw(){ | |
/bin/ping -q -c 3 -w 10 $gateway >> $logfile 2>&1 | |
if [ $? == 1 ] | |
then | |
log "Unreachable GW, IP cannot be taken ... ABORTING" | |
pidfile_del | |
notify FAULT | |
exit 1 | |
else | |
log "GW correctly pinged" | |
return 0 | |
fi | |
} | |
function ping_vip(){ | |
n=$1 | |
for i in {1..$n} | |
do | |
/bin/ping -q -c 1 -w 5 $virtual_ip >> $logfile | |
if [ $? == 0 ] | |
then | |
log "Ping $virtual_ip Success! ... ABORTING" | |
##### THIS CHECKS ARE ALREADY PERFORMED BY NAGIOS | |
# check_mysql_alive | |
# check_slave_status | |
#### | |
pidfile_del | |
log "### END #####" | |
log "### END LOG #####" | |
notify FAULT | |
exit 1 | |
else | |
log "Ping $virtual_ip: unreachable at try $i ... Go On!" | |
stop_replica | |
fi | |
done | |
return 0 | |
} | |
function pidfile_create(){ | |
pid=$1 | |
touch "$pidfile" | |
if [ $? == 0 ] | |
then | |
log "pidfile created" | |
echo $$ > $pidfile | |
log "pid $pid registered" | |
return 0 | |
else | |
log "ERROR ... pidfile cannot be created" | |
log "### END #####" | |
log "### END LOG #####" | |
notify FAULT | |
exit 1 | |
fi | |
} | |
function pidfile_del(){ | |
if [ -e "$pidfile" ] | |
then | |
rm -f "$pidfile" | |
if [ -e "$pidfile" ] | |
then | |
log "WARNING ... pidfile cannot be removed" | |
else | |
log "pidfile correctly removed" | |
fi | |
fi | |
return 0 | |
} | |
function check_pid_file(){ | |
#log "proc_count = $proc_count" | |
if [ -e $pidfile ] | |
then | |
pid=`head -n 1 $pidfile` | |
log "Found previous pidfile with pid $pid ... checking processes" | |
proc_check=`ps ax|grep -w "$pid"|grep -v "grep"|wc -l` | |
if [ $proc_chec -gt 0 ] | |
then | |
log "Process $pid is still running ... ABORTING" | |
log "##### END #####" | |
log "##### END LOG #####" | |
notify FAULT | |
exit 1 | |
else | |
log "### Old pidfile not removed ... replacing it" | |
pidfile_del | |
pidfile_create $$ | |
fi | |
else | |
pidfile_create $$ | |
fi | |
return 0 | |
} | |
function instance_start(){ | |
for i in {1..2} | |
do | |
/etc/init.d/mysql start | |
if [ $? != 0 ] | |
then | |
log "RESTART FAILED .. ABORTING" | |
if [ $i == 2 ] | |
then | |
log "##### END #####" | |
pidfile_del | |
log "##### END LOG #####" | |
notify FAULT | |
exit 1 | |
fi | |
else | |
log "instance UP ... OK!" | |
return 0 | |
break | |
fi | |
done | |
} | |
function master_instance_start(){ | |
instance_start | |
if [ $? != 0 ] | |
then | |
return 1 | |
else | |
return 0 | |
fi | |
} | |
function stop_replica(){ | |
MASTER_LOG_FILE=`$MYSQL -e 'show slave status\G'|grep "Master_Log_File" | grep -v "Relay_Master_Log_File"|sed -n -r 's/.*Master_Log_File: ([A-Za-z]+)/\1/p'` | |
MASTER_LOG_POS=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Read_Master_Log_Pos: ([0-9]+)/\1/p'` | |
$MYSQL -e 'stop slave;' | |
if [ '$report_host' != 'NoReport' ] | |
then | |
stop_report_replica | |
fi | |
} | |
function start_slave(){ | |
MASTER_LOG_FILE = $1 | |
MASTER_LOG_POS = $1 | |
$MYSQL -e "stop slave" | |
$MYSQL -e "start slave until master_log_file='$MASTER_LOG_FILE',master_log_pos=$MASTER_LOG_POS" | |
if [ $? == 0 ] | |
then | |
log "SLAVE RUNNING ... OK!" | |
else | |
log "SLAVE START FAILED ... ABORTING!" | |
pidfile_del | |
log "##### END LOG #####" | |
notify FAULT | |
exit 1 | |
fi | |
check_slave_delay $MASTER_LOG_FILE $MASTER_LOG_POS | |
return 0 | |
} | |
function check_replication_errors(){ | |
IO_ERRNO=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Last_IO_Errno: ([0-9]+)/\1/p'` | |
SQL_ERRNO=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Last_SQL_Errno: ([0-9]+)/\1/p'` | |
IO_ERR=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Last_IO_Error: ([A-Za-z]+)/\1/p'` | |
SQL_ERR=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Last_SQL_Error: ([A-Za-z]+)/\1/p'` | |
if [ $SQL_ERRNO -eq 0 ] | |
then | |
if [ $IO_ERRNO -ne 0 ] | |
then | |
log "WARNING - Master instance unreachable: $IO_ERR" | |
fi | |
else | |
log "instance cannot be aligned: SQL Error: [$SQL_ERRNO] - $SQL_ERR ... ABORTING!" | |
pidfile_del | |
log "##### END LOG #####" | |
notify FAULT | |
exit 1 | |
fi | |
return 0 | |
} | |
function check_slave_delay(){ | |
MASTER_LOG_FILE = $1 | |
MASTER_LOG_POS = $2 | |
EXCUTED_MASTER_LOG_FILE=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Relay_Master_Log_File: ([A-Za-z]+)/\1/p'` | |
EXEC_MASTER_LOG_POS=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Exec_Master_Log_Pos: ([0-9]+)/\1/p'` | |
if [ "$EXCUTED_MASTER_LOG_FILE" != "$MASTER_LOG_FILE" -o "$EXEC_MASTER_LOG_POS" != "$MASTER_LOG_POS" ] | |
then | |
EXCUTED_MASTER_LOG_FILE=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Relay_Master_Log_File: ([A-Za-z]+)/\1/p'` | |
EXEC_MASTER_LOG_POS=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Exec_Master_Log_Pos: ([0-9]+)/\1/p'` | |
log "Executed replication $EXCUTED_MASTER_LOG_FILE of $MASTER_LOG_FILE - Executed position $EXEC_MASTER_LOG_POS of $MASTER_LOG_POS" | |
check_replication_errors | |
log "ERROR: The replica is not aligned!" | |
pidfile_del | |
notify FAULT | |
exit 1 | |
fi | |
log "Replica completely aligned" | |
return 0 | |
} | |
function check_slave_status(){ | |
MASTER_LOG_FILE = $1 | |
MASTER_LOG_POS = $2 | |
##### Check if I/O Thread and SQL Thread are both up-and-running if MySQL instance is aligned with Master instance | |
IO_RUNNING=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Slave_IO_Running: ([A-Za-z]+)/\1/p'|tr '[:upper:]' '[:lower:]'` | |
SQL_RUNNING=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Slave_SQL_Running: ([A-Za-z]+)/\1/p'|tr '[:upper:]' '[:lower:]'` | |
log "SQL Running: $SQL_RUNNING, I/O Running: $IO_RUNNING" | |
if [ "$IO_RUNNING" == "yes" -a "$SQL_RUNNING" == "yes" ] | |
then | |
log "SLAVE RUNNING" | |
$MYSQL -e "stop slave" | |
#start_slave $MASTER_LOG_FILE $MASTER_LOG_POS | |
check_slave_delay $MASTER_LOG_FILE $MASTER_LOG_POS | |
elif [ "$IO_RUNNING" == "no" -a "$SQL_RUNNING" == "no" ] | |
then | |
log "SLAVE STOPPED ... Attempting slave start" | |
start_slave $MASTER_LOG_FILE $MASTER_LOG_POS | |
check_slave_delay $MASTER_LOG_FILE $MASTER_LOG_POS | |
elif [ "$IO_RUNNING" != "$SQL_RUNNING" ] | |
then | |
log "WARNING - Threads can be uncorrectly initialized: I/O Tread: $IO_RUNNING - SQL Tread: $SQL_RUNNING" | |
$MYSQL -e "stop slave" | |
start_slave $MASTER_LOG_FILE $MASTER_LOG_POS | |
check_slave_delay $MASTER_LOG_FILE $MASTER_LOG_POS | |
fi | |
} | |
function check_mysql_alive(){ | |
$MYSQLADMIN ping > /dev/null 2>&1 | |
if [ $? != 0 ] | |
then | |
log "instance IS DOWN!" | |
## check if the flag file is older than 8 hours ( 28800 sec ) | |
## if it is older, than we will remove it and start mysql | |
## if it is younger, we will exit and check on the next loop | |
file_age=`echo $(( `date +%s` - `stat -L --format %Y /tmp/mysql.stop` ))` | |
if [ $file_age -gt 28800 ] | |
then | |
log "The flag file ( /tmp/mysql.stop ) has been created $file_age second ago, so it is is older than 8 hours." | |
log "Starting the MySQL instance." | |
instance_start | |
else | |
log "The backup file has been created $file_age second ago, so it is is younger than 8 hours." | |
log "Exiting waiting the next loop." | |
pidfile_del | |
notify FAULT | |
exit 1 | |
fi | |
else | |
log "instance UP ... OK!" | |
fi | |
} | |
function refresh_and_save_mysql_info(){ | |
## enable log_slave_update | |
#log_slave_update=`$MYSQL -B -N -e "select VARIABLE_VALUE from information_schema.GLOBAL_VARIABLES where VARIABLE_NAME = 'log_slave_updates'"` | |
#if [ "$log_slave_update" == "OFF" ] | |
#then | |
#enable log_slave_update | |
# sed -i.`date +%Y%m%d_%H%M%S` -e 's/#.*log_slave_updates/log_slave_updates/g' /etc/mysql-$instance.cnf | |
#fi | |
## create a new replication_log file | |
log "Restarting the MySQL instance." | |
$MYSQLADMIN shutdown | |
instance_start | |
if [ "$report_host" != "NoReport" ] | |
then | |
source report_replication_check.sh $report_host save_mysql_info | |
fi | |
} | |
###################################################################################################################################################################################### | |
log "##### STARTING LOG #####" | |
if [ "$possessed_vip" == "$virtual_ip" ] | |
then | |
log "Server owns VIP $virtual_ip" | |
log "$MYSQLADMIN" | |
$MYSQLADMIN ping > /dev/null 2>&1 | |
if [ $? != 0 ] | |
then | |
check_pid_file | |
log "instance DOWN! ... Trying to restart instance" | |
master_instance_start | |
log "##### END #####" | |
pidfile_del | |
log "]##### END LOG #####" | |
else | |
log "instance Up" | |
log "##### END LOG #####" | |
exit 0 | |
fi | |
if [ "$report_host" != "NoReport" ] | |
then | |
REPORT_MASTER_LOG_POS=`$R_MYSQL -h $report_host -u $r_user -p$r_psw -e 'show slave status\G'|sed -n -r 's/.*Read_Master_Log_Pos: ([0-9]+)/\1/p'` | |
REPORT_MASTER_LOG_FILE=`$R_MYSQL -h $report_host -u $r_user -p$r_psw -e 'show slave status\G'|grep "Master_Log_File" | grep -v "Relay_Master_Log_File"|sed -n -r 's/.*Master_Log_File: ([A-Za-z]+)/\1/p'` | |
check_report_slave_delay $REPORT_MASTER_LOG_FILE $REPORT_MASTER_LOG_POS | |
fi | |
else | |
log "Server is Fail-Over" | |
log "$MYSQLADMIN" | |
check_pid_file | |
ping_gw | |
check_mysql_alive | |
ping_vip 1 | |
MASTER_HOST=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Master_Host: ([A-Za-z]+)/\1/p'` | |
MASTER_PORT=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Master_Port: ([0-9]+)/\1/p'` | |
MASTER_LOG_POS=`$MYSQL -e 'show slave status\G'|sed -n -r 's/.*Read_Master_Log_Pos: ([0-9]+)/\1/p'` | |
MASTER_LOG_FILE=`$MYSQL -e 'show slave status\G'|grep "Master_Log_File" | grep -v "Relay_Master_Log_File"|sed -n -r 's/.*Master_Log_File: ([A-Za-z]+)/\1/p'` | |
MY_HOST=`hostname` | |
MY_PORT=`$MYSQL -e 'show variables like "port"\G'|sed -n -r 's/.*Value: ([0-9]+)/\1/p'` | |
MY_LOG_FILE=`$MYSQL -e 'show master status\G'|sed -n -r 's/.*File: ([A-Za-z]+)/\1/p'` | |
MY_LOG_POS=`$MYSQL -e 'show master status\G'|sed -n -r 's/.*Position: ([0-9]+)/\1/p'` | |
check_slave_status $MASTER_LOG_FILE $MASTER_LOG_POS | |
if [ "$report_host" != "NoReport" ] | |
then | |
report_replication_table $MY_HOST $MY_PORT $MY_LOG_FILE $MY_LOG_POS 'no' 0 | |
fi | |
## refresh binary_logs and save master info | |
refresh_and_save_mysql_info | |
if [ ${#viface} -gt 16 ] | |
then | |
log "WARNING - Virtual Interface name too long, a maximum of 16 chars can be used, the name will be truncated to $virt_iface" | |
fi | |
log "OK, If needed this instance can take the IP." | |
pidfile_del | |
log "##### END LOG #####" | |
exit 0 | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# monitor mysql status | |
STATE=$1 | |
virtual_ip=$2 | |
logfile=/var/log/keepalived_status.log | |
## FUNCTIONS ######################################################################################################################################################################### | |
function log(){ | |
echo "`date "+%F %T"` $@" >> $logfile | |
} | |
log "# $virtual_ip : $STATE" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment