Created
August 10, 2016 18:15
-
-
Save anonymous/4b74dabf9343ae2d146a60f3c1a0815e to your computer and use it in GitHub Desktop.
postStart hook for mongo replica set in kubernetes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
PORT=27017 | |
# this is the label for the mongo pods. | |
# In my case it is prefixed by the TRACK variable (dev, test or prod) | |
# edit according to your setup | |
MONGO_LABEL=${TRACK}-mongo | |
# We log to a log file as there is no output to std when running the hook. | |
# We can check this log file to see what happened once the hook has run | |
LOGFILE="/postStart.log" | |
date > $LOGFILE | |
# Get the pods referenced by the Headless Service | |
function getPods() { | |
curl -s -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \ | |
--cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \ | |
https://${KUBERNETES_SERVICE_HOST}/api/v1/namespaces/${TRACK}/endpoints/${MONGO_LABEL} \ | |
| jq '.subsets[]? .addresses[]? .ip' \ | |
| awk -F\" '{print $2":'$PORT'"}' | |
} | |
# Check if the pod is part of a replica set, and output name of master | |
function replicaSetMaster() { | |
OUTPUT=$(mongo $1 --quiet --eval "db.isMaster()['primary']") | |
eval "$2=${OUTPUT}" | |
} | |
# List all nodes of the replica set (from master node passed as arg $1) | |
function replicaSetNodes() { | |
mongo $1 --quiet --eval "db.isMaster()['hosts']" \ | |
| jq '.[]?' \ | |
| awk -F\" '{print $2}' | |
} | |
function addMember() { | |
echo "ADDING $2 to the replicaSet" >> $LOGFILE | |
mongo $1 --quiet --eval "rs.add('${2}')" >> $LOGFILE | |
} | |
function reconfig() { | |
echo "RECONFIGURING" >> $LOGFILE | |
echo "cfg=rs.conf(); \ | |
healthy=rs.status().members.filter(function(d){return d.health>0;}).map(function(d){ return d.name;}); \ | |
cfg.members=cfg.members.filter(function(d){return healthy.indexOf(d.host) >=0;}); \ | |
rs.reconfig(cfg,{force:true}); \ | |
" | mongo $1 --quiet >> $LOGFILE | |
} | |
function removeMember() { | |
UPTIME=$(echo "rs.status()['members'].filter(function(d){return d.name == '$2';}).map(function(d) { return d.uptime;});" \ | |
| mongo $1 --quiet) | |
if [[ $UPTIME > 120 ]] | |
then | |
echo "REMOVING $2 node from replicaSet" >> $LOGFILE; | |
mongo $1 --quiet --eval "rs.remove('${2}',{'force':true})" >> $LOGFILE; | |
else | |
echo "NODE $2 just started"; | |
fi | |
} | |
function findMaster() { | |
local PODS | |
PODS=$(getPods) | |
MASTER="" | |
for POD in $PODS; | |
do | |
replicaSetMaster $POD M | |
# Check that the master found from checking the node is one of the active pods | |
if nodeIsInSet $M $PODS; | |
then | |
MASTER=$M | |
fi | |
done | |
echo "MASTER IS: $MASTER" >> $LOGFILE | |
} | |
function nodeIsInSet() { | |
local e | |
for e in "${@:2}"; do [[ "$e" == "$1" ]] && return 0; done | |
return 1 | |
} | |
# wait a bit for Service to register | |
sleep 10 | |
THISNODE="$(hostname -i):${PORT}" | |
echo "THIS NODE IS: $THISNODE" >> $LOGFILE | |
# Get IPs of Pods in the cluster | |
# when first node, there won't be any as the pod is only registered by the Service once this hook has run | |
PODS=$(getPods) | |
echo -e "$MONGO_LABEL SERVICE REFERENCES: \n$PODS" >> $LOGFILE | |
# Find the master if there is one | |
findMaster | |
if [[ "$PODS" == "" ]] | |
then | |
# no other pods? this node is the first node -> init as master | |
echo "NO OTHER POD: INITIALIZING REPLICATION SET ON $THISNODE AS MASTER" >> $LOGFILE | |
# need to wait for mongo to get started | |
until $(curl -o /dev/null -s --fail $THISNODE); do | |
printf '.'; | |
sleep 1; | |
done | |
mongo ${THISNODE} --quiet --eval "rs.initiate()" >> $LOGFILE | |
else | |
# we have pods registered on the Service | |
if [[ "$MASTER" == "" ]] | |
then | |
# Other pods but no master? we lost the master | |
# reconfigure and wait for master re-election | |
echo "PODS REGISTERED BUT NO MASTER FOUND: RECONFIGURING"; | |
while [[ "$MASTER" == "" ]]; do | |
sleep 2 | |
reconfig ${PODS[0]} | |
findMaster; | |
done | |
addMember $MASTER $THISNODE | |
else | |
# If we have a master, let's register nodes | |
echo "CHECKING NODES TO ADD / REMOVE" >> $LOGFILE | |
NODES=$(replicaSetNodes $MASTER) | |
echo "NODES IN SET: $NODES" | |
# Add ourselves to the list of pods in the Service | |
PODS+=" ${THISNODE}" | |
for POD in $PODS; | |
do | |
if nodeIsInSet $POD $NODES; | |
then | |
printf '%-22s %s\n' $POD "is in the replicaSet" >> $LOGFILE; | |
# delete node from our node list, we will check what is left | |
# when we end the loop to remove left over nodes | |
DELETE=($POD); | |
# echo -e "${NODES[@]}" >> $LOGFILE; | |
NODES=( "${NODES[@]/$DELETE}" ); # quotes are important here | |
# echo "$NODES NODES LEFT" >> $LOGFILE; | |
else | |
addMember $MASTER $POD | |
fi | |
done; | |
echo -e "REMOVE UNHEALTHY NODES: \n" >> $LOGFILE | |
reconfig $MASTER | |
fi | |
fi | |
echo "RESULTING CONFIG" >> $LOGFILE | |
replicaSetNodes $MASTER >> $LOGFILE | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment