Skip to content

Instantly share code, notes, and snippets.

@ei-grad
Last active August 15, 2024 06:02
Show Gist options
  • Save ei-grad/7e24b6964988790694c30834c94f1571 to your computer and use it in GitHub Desktop.
Save ei-grad/7e24b6964988790694c30834c94f1571 to your computer and use it in GitHub Desktop.
EKS Bottlerocket - use ephemeral disks to store container images
FROM public.ecr.aws/amazonlinux/amazonlinux:minimal
RUN microdnf -y install e2fsprogs bash mdadm util-linux && microdnf clean all
ADD setup-runtime-storage.sh ./
ENTRYPOINT ["/bin/bash", "setup-runtime-storage.sh"]
#!/usr/bin/env bash
# Based on https://github.com/bottlerocket-os/bottlerocket/discussions/1991#discussioncomment-3265188
set -ex
ROOT_PATH="/.bottlerocket/rootfs"
# Exit early if there aren't ephemeral disks
if [ -z "$(ls -A "${ROOT_PATH}/dev/disk/ephemeral/")" ]; then
echo "No ephemeral disks found, exiting."
exit 0
fi
# Symlinks to ephemeral disks are created here by udev
declare -a EPHEMERAL_DISKS
EPHEMERAL_DISKS=("${ROOT_PATH}"/dev/disk/ephemeral/*)
MD_NAME="scratch"
MD_DEVICE="/dev/md/${MD_NAME}"
MD_CONFIG="/.bottlerocket/bootstrap-containers/current/mdadm.conf"
# Create or assemble the RAID array.
if [ ! -s "${MD_CONFIG}" ]; then
mdadm --create --force --verbose \
"${MD_DEVICE}" \
--level=0 \
--name="${MD_NAME}" \
--raid-devices="${#EPHEMERAL_DISKS[@]}" \
"${EPHEMERAL_DISKS[@]}" || { echo "Failed to create RAID array"; exit 1; }
mdadm --detail --scan > "${MD_CONFIG}" || { echo "Failed to save RAID configuration"; exit 1; }
else
mdadm --assemble --config="${MD_CONFIG}" "${MD_DEVICE}" || { echo "Failed to assemble RAID array"; exit 1; }
fi
# Format the array if not already formatted.
if ! blkid --match-token TYPE=ext4 "${MD_DEVICE}" ; then
mkfs.ext4 "${MD_DEVICE}" || { echo "Failed to format ${MD_DEVICE}"; exit 1; }
fi
MOUNT_POINT="${ROOT_PATH}/mnt/${MD_NAME}"
# Mount the array in the host's /mnt.
mkdir -p "${MOUNT_POINT}" || { echo "Failed to create mount point ${MOUNT_POINT}"; exit 1; }
mount "${MD_DEVICE}" "${MOUNT_POINT}" || { echo "Failed to mount ${MD_DEVICE}"; exit 1; }
# Keep track of whether we can unmount the array later. This depends on the version of Bottlerocket.
should_umount="no"
# Bind state directories to the array, if they exist.
for state_dir in containerd docker kubelet; do
# The correct next step depends on the version of Bottlerocket, which can be
# inferred by inspecting the mounts available to the bootstrap container.
if findmnt "${ROOT_PATH}/var/lib/${state_dir}" > /dev/null; then
# For Bottlerocket >= 1.9.0, the state directory can be bind-mounted over
# the host directory and the mount will propagate back to the host.
mkdir -p "${MOUNT_POINT}/${state_dir}" || { echo "Failed to create directory ${MOUNT_POINT}/${state_dir}"; exit 1; }
mount --rbind "${MOUNT_POINT}/${state_dir}" "${ROOT_PATH}/var/lib/${state_dir}" || { echo "Failed to bind mount ${MOUNT_POINT}/${state_dir}"; exit 1; }
mount --make-rshared "${ROOT_PATH}/var/lib/${state_dir}" || { echo "Failed to make rshared mount ${ROOT_PATH}/var/lib/${state_dir}"; exit 1; }
should_umount="yes"
elif [ ! -L "${ROOT_PATH}/var/lib/${state_dir}" ]; then
# For Bottlerocket < 1.9.0, the host directory needs to be replaced with a
# symlink to the state directory on the array. This works but can lead to
# unexpected behavior or incompatibilities, for example, with CSI drivers.
if [ -d "${ROOT_PATH}/var/lib/${state_dir}" ]; then
# The host directory exists but is not a symlink, and might need to be
# relocated to the storage array. This depends on whether the host has
# been downgraded from a newer version of Bottlerocket or if it's
# the first boot of an older version.
if [ -d "${MOUNT_POINT}/${state_dir}" ]; then
# If downgrading from a version of Bottlerocket that supported bind mounts,
# the directory will exist but should be empty, except for subdirectories
# that may have been created by tmpfiles.d before an upgrade to that version.
# Keep a copy of the directory just in case.
rm -rf "${ROOT_PATH}/var/lib/${state_dir}.bak"
mv "${ROOT_PATH}/var/lib/${state_dir}"{,.bak} || { echo "Failed to backup ${ROOT_PATH}/var/lib/${state_dir}"; exit 1; }
else
# Otherwise, treat it as the first boot of an older version and move
# the directory to the array.
mv "${ROOT_PATH}/var/lib/${state_dir}" "${MOUNT_POINT}/${state_dir}" || { echo "Failed to move ${ROOT_PATH}/var/lib/${state_dir} to ${MOUNT_POINT}/${state_dir}"; exit 1; }
fi
else
# The host directory does not exist, so the target directory likely needs
# to be created.
mkdir -p "${MOUNT_POINT}/${state_dir}" || { echo "Failed to create directory ${MOUNT_POINT}/${state_dir}"; exit 1; }
fi
# Any host directory has been dealt with and the symlink can be created.
ln -snfT "/mnt/${MD_NAME}/${state_dir}" "${ROOT_PATH}/var/lib/${state_dir}" || { echo "Failed to create symlink for ${state_dir}"; exit 1; }
fi
done
# When using bind mounts, the parent directory where the array is mounted can
# be unmounted. This avoids a second, redundant mount entry under `/mnt` for
# every new mount in one of the state directories.
if [ "${should_umount}" == "yes" ]; then
umount "${MOUNT_POINT}" || { echo "Failed to unmount ${MOUNT_POINT}"; exit 1; }
fi
[settings.bootstrap-containers.setup-runtime-storage]
source = "<PUT_YOUR_ECR_REPO_URL_HERE>:latest"
mode = "always"
essential = true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment