Skip to content

Instantly share code, notes, and snippets.

Created August 10, 2016 18:15
Show Gist options
  • Save anonymous/4b74dabf9343ae2d146a60f3c1a0815e to your computer and use it in GitHub Desktop.
Save anonymous/4b74dabf9343ae2d146a60f3c1a0815e to your computer and use it in GitHub Desktop.
postStart hook for mongo replica set in kubernetes
#!/usr/bin/env bash
PORT=27017
# this is the label for the mongo pods.
# In my case it is prefixed by the TRACK variable (dev, test or prod)
# edit according to your setup
MONGO_LABEL=${TRACK}-mongo
# We log to a log file as there is no output to std when running the hook.
# We can check this log file to see what happened once the hook has run
LOGFILE="/postStart.log"
date > $LOGFILE
# Get the pods referenced by the Headless Service
function getPods() {
curl -s -H "Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" \
--cacert /var/run/secrets/kubernetes.io/serviceaccount/ca.crt \
https://${KUBERNETES_SERVICE_HOST}/api/v1/namespaces/${TRACK}/endpoints/${MONGO_LABEL} \
| jq '.subsets[]? .addresses[]? .ip' \
| awk -F\" '{print $2":'$PORT'"}'
}
# Check if the pod is part of a replica set, and output name of master
function replicaSetMaster() {
OUTPUT=$(mongo $1 --quiet --eval "db.isMaster()['primary']")
eval "$2=${OUTPUT}"
}
# List all nodes of the replica set (from master node passed as arg $1)
function replicaSetNodes() {
mongo $1 --quiet --eval "db.isMaster()['hosts']" \
| jq '.[]?' \
| awk -F\" '{print $2}'
}
function addMember() {
echo "ADDING $2 to the replicaSet" >> $LOGFILE
mongo $1 --quiet --eval "rs.add('${2}')" >> $LOGFILE
}
function reconfig() {
echo "RECONFIGURING" >> $LOGFILE
echo "cfg=rs.conf(); \
healthy=rs.status().members.filter(function(d){return d.health>0;}).map(function(d){ return d.name;}); \
cfg.members=cfg.members.filter(function(d){return healthy.indexOf(d.host) >=0;}); \
rs.reconfig(cfg,{force:true}); \
" | mongo $1 --quiet >> $LOGFILE
}
function removeMember() {
UPTIME=$(echo "rs.status()['members'].filter(function(d){return d.name == '$2';}).map(function(d) { return d.uptime;});" \
| mongo $1 --quiet)
if [[ $UPTIME > 120 ]]
then
echo "REMOVING $2 node from replicaSet" >> $LOGFILE;
mongo $1 --quiet --eval "rs.remove('${2}',{'force':true})" >> $LOGFILE;
else
echo "NODE $2 just started";
fi
}
function findMaster() {
local PODS
PODS=$(getPods)
MASTER=""
for POD in $PODS;
do
replicaSetMaster $POD M
# Check that the master found from checking the node is one of the active pods
if nodeIsInSet $M $PODS;
then
MASTER=$M
fi
done
echo "MASTER IS: $MASTER" >> $LOGFILE
}
function nodeIsInSet() {
local e
for e in "${@:2}"; do [[ "$e" == "$1" ]] && return 0; done
return 1
}
# wait a bit for Service to register
sleep 10
THISNODE="$(hostname -i):${PORT}"
echo "THIS NODE IS: $THISNODE" >> $LOGFILE
# Get IPs of Pods in the cluster
# when first node, there won't be any as the pod is only registered by the Service once this hook has run
PODS=$(getPods)
echo -e "$MONGO_LABEL SERVICE REFERENCES: \n$PODS" >> $LOGFILE
# Find the master if there is one
findMaster
if [[ "$PODS" == "" ]]
then
# no other pods? this node is the first node -> init as master
echo "NO OTHER POD: INITIALIZING REPLICATION SET ON $THISNODE AS MASTER" >> $LOGFILE
# need to wait for mongo to get started
until $(curl -o /dev/null -s --fail $THISNODE); do
printf '.';
sleep 1;
done
mongo ${THISNODE} --quiet --eval "rs.initiate()" >> $LOGFILE
else
# we have pods registered on the Service
if [[ "$MASTER" == "" ]]
then
# Other pods but no master? we lost the master
# reconfigure and wait for master re-election
echo "PODS REGISTERED BUT NO MASTER FOUND: RECONFIGURING";
while [[ "$MASTER" == "" ]]; do
sleep 2
reconfig ${PODS[0]}
findMaster;
done
addMember $MASTER $THISNODE
else
# If we have a master, let's register nodes
echo "CHECKING NODES TO ADD / REMOVE" >> $LOGFILE
NODES=$(replicaSetNodes $MASTER)
echo "NODES IN SET: $NODES"
# Add ourselves to the list of pods in the Service
PODS+=" ${THISNODE}"
for POD in $PODS;
do
if nodeIsInSet $POD $NODES;
then
printf '%-22s %s\n' $POD "is in the replicaSet" >> $LOGFILE;
# delete node from our node list, we will check what is left
# when we end the loop to remove left over nodes
DELETE=($POD);
# echo -e "${NODES[@]}" >> $LOGFILE;
NODES=( "${NODES[@]/$DELETE}" ); # quotes are important here
# echo "$NODES NODES LEFT" >> $LOGFILE;
else
addMember $MASTER $POD
fi
done;
echo -e "REMOVE UNHEALTHY NODES: \n" >> $LOGFILE
reconfig $MASTER
fi
fi
echo "RESULTING CONFIG" >> $LOGFILE
replicaSetNodes $MASTER >> $LOGFILE
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment