-
-
Save richardkmichael/bed77b51ee6c5c3cc8cc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# pgpool-II replication manager | |
# | |
# Interfaces with pgpool's pcp command-line tools to provide access to common functions for managing | |
# load-balancing and failover. | |
# | |
# [email protected] | |
# 2011-08-28 | |
# | |
# PCP configuration | |
pcp_host="127.0.0.1" | |
pcp_port="9898" | |
pcp_username="pgpool" | |
pcp_password="password" | |
pcp_timeout="10" | |
# Health check uses psql to connect to each backend server. Specify options required to connect here | |
psql_healthcheck_opts="-U postgres" | |
# Default options to send to pcp commands | |
pcp_cmd_preamble="$pcp_timeout $pcp_host $pcp_port $pcp_username $pcp_password" | |
# | |
# Runs abitrary pcp command with the preamble | |
# | |
_pcp() | |
{ | |
cmd=$1 | |
shift | |
$cmd $pcp_cmd_preamble $* | |
} | |
# | |
# Prints the total number of pgpool nodes. | |
# | |
_get_node_count() | |
{ | |
pcp_node_count $pcp_cmd_preamble | |
} | |
# | |
# Prints out node information for the specified pgpool node | |
# | |
_get_node_info() | |
{ | |
if [ -z $1 ]; then | |
echo "Usage: $0 _get_node_info <node id>" >&2 | |
return 99 | |
fi | |
pcp_node_info $pcp_cmd_preamble $1 | |
} | |
# | |
# Outputs the replication lag (in bytes) between the specified master and slave | |
# | |
_get_replication_lag() | |
{ | |
if [ -z $2 ]; then | |
echo "Usage: $0 _get_replication_lag <master node id> <slave node id>" >&2 | |
return 99 | |
fi | |
# Get connection information for nodes | |
master_node_info=($(_get_node_info $1)) | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting node info for node $1" >&2 | |
return 1 | |
fi | |
slave_node_info=($(_get_node_info $2)) | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting node info for node $2" >&2 | |
return 1 | |
fi | |
for n in $1 $2; do | |
if [ $(_is_node_alive $n) -eq 0 ]; then | |
echo "ERROR: Node $n is not available. Unable to calculate lag." >&2 | |
return 1 | |
fi | |
done | |
export PGCONNECT_TIMEOUT=2 | |
# Get xlog locations for master and slaves | |
master_xlog_loc=$(psql $psql_healthcheck_opts -h "${master_node_info[0]}" -p "${master_node_info[1]}" -Atc "SELECT pg_current_xlog_location();") | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting xlog location from node $1" >&2 | |
return 1 | |
fi | |
slave_xlog_loc=$(psql $psql_healthcheck_opts -h "${slave_node_info[0]}" -p "${slave_node_info[1]}" -Atc "SELECT pg_last_xlog_replay_location();") | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting xlog location from node $2" >&2 | |
return 1 | |
fi | |
# Number of bytes behind | |
echo $(($(_xlog_to_bytes $master_xlog_loc) - $(_xlog_to_bytes $slave_xlog_loc))) | |
} | |
# | |
# Converts specified xlog number to byte location | |
# | |
_xlog_to_bytes() | |
{ | |
if [ -z $1 ]; then | |
echo "Usage: $0 _xlog_to_bytes <xlog loc>" >&2 | |
echo " Eg.: $0 _xlog_to_bytes 0/2BC825C0" >&2 | |
return 99 | |
fi | |
logid="${1%%/*}" | |
offset="${1##*/}" | |
echo $((0xFFFFFF * 0x$logid + 0x$offset)) | |
} | |
# | |
# Prints whether the postgresql service on the specified node is responding. | |
# | |
_is_node_alive() | |
{ | |
if [ -z $1 ]; then | |
echo "Usage: $0 _is_node_alive <node id>" >&2 | |
return 99 | |
fi | |
# Get node connection information | |
node_info=($(_get_node_info $1)) | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting node info for node $1" >&2 | |
return 1 | |
fi | |
export PGCONNECT_TIMEOUT=2 | |
result=$(psql $psql_healthcheck_opts -h "${node_info[0]}" -p "${node_info[1]}" -Atc "SELECT 1;" 2>/dev/null) | |
if [ "$result" == "1" ]; then | |
echo 1 | |
return 1 | |
else | |
echo 0 | |
return 0 | |
fi | |
} | |
# | |
# Get the node ID of the first master | |
# | |
_get_master_node() | |
{ | |
# Get total number of nodes | |
nodes=$(_get_node_count) | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting node count: $nodes" >&2 | |
exit 1 | |
fi | |
c=0 | |
# Loop through each node to check if it's the master | |
while [ $c -lt $nodes ]; do | |
if [ "$(_is_standby $c)" == "0" ]; then | |
echo $c | |
return 0 | |
fi | |
let c=c+1 | |
done | |
echo "-1" | |
return 1 | |
} | |
# | |
# Checks if the node is in postgresql recovery mode (ie. if it is a slave) | |
# | |
_is_standby() | |
{ | |
if [ -z $1 ]; then | |
echo "Usage: $0 _is_standby <node id>" >&2 | |
return 99 | |
fi | |
# Get node connection information | |
node_info=($(_get_node_info $1)) | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting node info for node $1" >&2 | |
return 1 | |
fi | |
export PGCONNECT_TIMEOUT=2 | |
result=$(psql $psql_healthcheck_opts -h "${node_info[0]}" -p "${node_info[1]}" -Atc "SELECT pg_is_in_recovery();" 2>/dev/null) | |
if [ "$result" == "t" ]; then | |
echo 1 | |
return 1 | |
else | |
echo 0 | |
return 0 | |
fi | |
} | |
# | |
# Attaches the specified node to the pool | |
# | |
attach() | |
{ | |
if [ -z $1 ]; then | |
echo "Usage: $0 attach <node id>" >&2 | |
return 99 | |
fi | |
pcp_attach_node $pcp_cmd_preamble $1 | |
} | |
# | |
# Detaches the specified node from the pool | |
# | |
detach() | |
{ | |
if [ -z $1 ]; then | |
echo "Usage: $0 detach <node id>" >&2 | |
return 99 | |
fi | |
pcp_detach_node $pcp_cmd_preamble $1 | |
} | |
# | |
# Recovers the specified node (restores it from current master and re-attaches) | |
# | |
recover() | |
{ | |
if [ -z $1 ]; then | |
echo "Usage: $0 recover <node id>" >&2 | |
return 99 | |
fi | |
pcp_recovery_node $pcp_cmd_preamble $1 | |
} | |
# | |
# Prints the status of the specified node in human readable format. | |
# | |
_get_node_status() | |
{ | |
if [ -z $1 ]; then | |
echo "Usage: $0 _get_node_status <node id>" >&2 | |
return 99 | |
fi | |
node_info=($(_get_node_info $1)) | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting node info for node $1" >&2 | |
else | |
node_role="" | |
node_replication_lag="" | |
node_alive="" | |
case "$(_is_node_alive $1)" in | |
1) | |
node_alive="Up" | |
;; | |
*) | |
node_alive="Down" | |
;; | |
esac | |
if [ "$node_alive" == "Up" ]; then | |
# Find out what role this node has | |
if [ "$(_is_standby $1)" == "1" ]; then | |
node_role="Slave" | |
# Calculation replication lag | |
master_node=$(_get_master_node) | |
node_replication_lag=$(_get_replication_lag $master_node $1) | |
if [ $? -eq 0 ]; then | |
node_replication_lag="$node_replication_lag bytes" | |
else | |
node_replication_log="Unknown" | |
fi | |
else | |
node_role="Master" | |
fi | |
fi | |
case "${node_info[2]}" in | |
3) | |
node_status="detached from pool" | |
;; | |
2) | |
node_status="in pool and connected" | |
;; | |
1) | |
node_status="in pool" | |
;; | |
*) | |
node_status="Unknown" | |
;; | |
esac | |
# Print status information about this node | |
echo "Node: $1" | |
echo "Host: ${node_info[0]}" | |
echo "Port: ${node_info[1]}" | |
echo "Weight: ${node_info[3]}" | |
echo "Status: $node_alive, $node_status (${node_info[2]})" | |
[ -n "$node_role" ] && echo "Role: $node_role" | |
[ -n "$node_replication_lag" ] && echo "Replication lag: $node_replication_lag" | |
echo "" | |
fi | |
} | |
# | |
# Prints out the status of all pgpool nodes in human readable form. | |
# | |
status() | |
{ | |
# Get total number of nodes | |
nodes=$(_get_node_count) | |
if [ $? -gt 0 ]; then | |
echo "ERROR: Failed getting node count: $nodes" >&2 | |
exit 1 | |
fi | |
c=0 | |
# Loop through each node to retrieve info | |
while [ $c -lt $nodes ]; do | |
_get_node_status $c | |
let c=c+1 | |
done | |
} | |
# Run function | |
if [ ! "$(type -t $1)" ]; then | |
echo "Usage $0 <option>" >&2 | |
echo "" >&2 | |
echo "Available options:" >&2 | |
echo "$(compgen -A function |grep -v '^_')" >&2 | |
exit 99 | |
else | |
cmd=$1 | |
shift | |
$cmd $* | |
exit $? | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment