Last active
April 27, 2023 21:32
-
-
Save bhamilton/90182aa335ed3b0d928c8ca649bbc769 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
function is_active() { | |
local s | |
s=$(echo "$BTOOL_INFO" | grep Nginx | awk -F":" '{print $2}' | xargs) | |
if [ -z "${s}" ] ; then | |
return 1 | |
fi | |
if [ "${s}" == "running" ]; then | |
return 0 | |
fi | |
return 1 | |
} | |
function wait_file() { | |
local file="$1"; shift | |
local wait_seconds="${1:-10}"; shift # 10 seconds as default timeout | |
test "$wait_seconds" -lt 1 && echo 'At least 1 second is required' && return 1 | |
until test $((wait_seconds--)) -eq 0 -o -e "$file" ; do sleep 1; done | |
test $wait_seconds -ge 0 # If we timed out, exit with a non-zero code | |
} | |
function pull_image() { | |
local repo=$1 | |
local image=$2 | |
local tag=$3 | |
local imagepath | |
local sock | |
local response | |
local runc_artifact | |
if [ $# -lt 3 ]; then | |
echo "Missing argument for pull_image <repo> <image> <tag>" | |
# Set global error response code to fail job with. | |
ERROR_RESPONSE=11 | |
return 1 | |
fi | |
# Image manager API does a pull, then a unpack. | |
echo "Pulling and unpacking image ${image}" | |
imagepath="${repo}/${image}${tag}" | |
sock="/var/run/cos-image-manager/manager.sock" | |
/chef-container.sh /bin/bash -c "echo 'unpack ${imagepath}' | socat - UNIX-CONNECT:$sock" | |
runc_artifact="/mnt/disks/data/rootfs/quay.io/getpantheon/cos-runtime-php/${original_php_tag}" | |
if wait_file "$runc_artifact" 120; then | |
echo "Successfully pulled ${imagepath}" | |
return 0 | |
else | |
echo "Error pulling image: ${response}" | |
# Set global error response code to fail job with. | |
ERROR_RESPONSE=12 | |
return 1 | |
fi | |
} | |
function healthcheck() { | |
local binding_url | |
local status | |
local current_php_tag | |
binding_url=$(echo "$BTOOL_INFO" | grep 'Binding URL' | cut -f2- -d: | xargs) | |
status=$(curl -ISs "${binding_url}pantheon_healthcheck" | grep -E "^HTTP/" | awk '{print $2}') | |
current_php_tag="$(basename "$(sudo cat "${CONFIG_FILE}" | jq -r .root.path)")" | |
if [ "$current_php_tag" = "$original_php_tag" ]; then | |
# Bailing out early without performing the /pantheon_healthcheck if a new cos-runtime-php image | |
# has not been pushed to prevent the job from emitting noise that is not related to a php update. | |
echo "[no-op] The image for cos-runtime-php has not changed since the last chef-solo-bindings run." | |
return | |
fi | |
if [ "$status" -ne 200 ]; then | |
# The current image tag placed by chef is failing our healthcheck. | |
if pull_image "quay.io/getpantheon" "cos-runtime-php" "@sha256:${original_php_tag}"; then | |
# Revert to the image that was present before chef-solo ran. | |
# Restart the container to pickup changes. | |
cat "${CONFIG_FILE}" | jq --arg current "${current_php_tag}" --arg original "${original_php_tag}" '.root.path |= (sub($current; $original))' | tee "${CONFIG_FILE}" 2>&1 >/dev/null | |
/chef-container.sh timeout -k 30s 600s btool -b "${binding_id}" restart php | |
# Set global error response code to fail job with. | |
ERROR_RESPONSE=13 | |
fi | |
fi | |
} | |
echo "$comment" | |
export TAG=$infra_image_tag | |
# Record previous state, prior to chef-solo run. | |
CONFIG_FILE="/mnt/disks/data/containers/${binding_id}/runc/php/config.json" | |
original_php_tag="$(basename "$(sudo cat "${CONFIG_FILE}" | jq -r .root.path)")" | |
# merge the contents of /etc/pantheon/settings.json into this job's specific json values | |
CHEF_FILE="jenkins-chef-${JOB_NAME}-${BUILD_NUMBER}-${RANDOM}.json" | |
(cat /etc/pantheon/settings.json; cat <<EOF | |
{ | |
"run_list": ["recipe[endpoint::bindings]"], | |
"binding_id": "$binding_id", | |
"job_id": "$job_id", | |
"task_id": "$task_id", | |
"workflow_id": "$workflow_id", | |
"trace_id": "$trace_id", | |
"max_bindings": "$max_bindings" | |
} | |
EOF | |
) | jq -s add > "$JENKINS_HOME"/chef/"$CHEF_FILE" | |
cat "$JENKINS_HOME"/chef/"$CHEF_FILE" | |
# chef-runner sets up the docker run, and takes the arguments we pass to it | |
# /chef will be the $JENKINS_HOME/chef dir inside the container | |
# prefix with TAG env var to controll wich release to use when running chef | |
/chef-container.sh chef-solo --no-fork -j /chef/"$CHEF_FILE" -l debug || ERROR_RESPONSE=$? | |
# We do not want to trigger socket activation and resurrect bindings to do | |
# this check. Similarly, explicitly checking for Nginx to ensure this | |
# remediation only gets applied to appservers. | |
BTOOL_INFO=$(/chef-container.sh PYTHONWARNINGS=ignore python /usr/local/bin/btool -b "${binding_id}" info) | |
if is_active; then | |
healthcheck | |
fi | |
rm -f "$JENKINS_HOME"/chef/"$CHEF_FILE" | |
# Error Responses: | |
# chef: https://github.com/chef/chef/blob/main/docs/dev/design_documents/client_exit_codes.md | |
# | |
# 11 - cos-image-manager malformed request | |
# 12 - cos-image-manager failure pulling/extracting image | |
# 13 - healthcheck has failed; remediation attempted | |
exit "$ERROR_RESPONSE" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment