Skip to content

Instantly share code, notes, and snippets.

@jumping
Created September 12, 2025 06:41
Show Gist options
  • Save jumping/8b3712f27399cf32be2123fa7e4d5d5f to your computer and use it in GitHub Desktop.
Save jumping/8b3712f27399cf32be2123fa7e4d5d5f to your computer and use it in GitHub Desktop.
Setup for AWS EKS(ALB、Ingress Nginx、 EFS)
#!/usr/bin/env bash
set -euo pipefail
# ==========================================
# Setup for EKS + EFS
# - Requires: kubectl, awscli, helm
# - Preconditions: EKS cluster is ready (nodes Ready), EFS exists and is 'available'
# ==========================================
EFS_ID=""
CLUSTER_NAME=""
AWS_REGION="${AWS_REGION:-$(aws configure get region 2>/dev/null || echo "")}"
INGRESS_MANIFEST_URL="https://raw.githubusercontent.com/kubernetes/ingress-nginx/controller-v1.13.2/deploy/static/provider/aws/deploy.yaml"
usage() {
cat <<EOF
Usage: $0 --efs-id <fs-xxxxxxxx> [--cluster-name <eks-cluster-name>] [--region <aws-region>]
Required:
--efs-id EFS filesystem id (e.g., fs-1234567890abcdef)
Optional:
--cluster-name EKS cluster name (if omitted, will try to infer from current kubeconfig)
--region AWS region (defaults to AWS CLI configured region if available)
Examples:
$0 --efs-id fs-0abc123def4567890 --cluster-name my-eks --region ap-southeast-1
EOF
}
# ---------- Parse args ----------
while [[ $# -gt 0 ]]; do
case "$1" in
--efs-id)
shift
EFS_ID="${1:-}"
;;
--cluster-name)
shift
CLUSTER_NAME="${1:-}"
;;
--region)
shift
AWS_REGION="${1:-}"
;;
-h|--help)
usage
exit 0
;;
*)
echo "Unknown argument: $1"
usage
exit 1
;;
esac
shift || true
done
if [[ -z "${EFS_ID}" ]]; then
echo "[ERROR] --efs-id is required."
usage
exit 1
fi
# ---------- Helpers ----------
require_cmd() {
if ! command -v "$1" >/dev/null 2>&1; then
echo "[ERROR] Command '$1' not found."
return 1
fi
}
try_install_helm() {
if command -v helm >/dev/null 2>&1; then
return 0
fi
if [[ "$(uname -s)" != "Linux" ]]; then
echo "[ERROR] helm is not installed. Please install helm first: https://helm.sh/docs/intro/install/"
return 1
fi
# Detect Amazon Linux / RPM-based and install Helm
local os_id=""; local os_like=""
if [[ -f /etc/os-release ]]; then
# shellcheck disable=SC1091
. /etc/os-release
os_id="${ID:-}"
os_like="${ID_LIKE:-}"
fi
if [[ "$os_id" == "amzn" ]]; then
echo "[INFO] Detected Amazon Linux. Installing Helm via official script..."
# Ensure curl/tar exist (AL2023 uses dnf)
if ! command -v curl >/dev/null 2>&1; then
sudo dnf install -y curl tar || sudo yum install -y curl tar || true
fi
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
return 0
fi
if command -v dnf >/dev/null 2>&1 || command -v yum >/dev/null 2>&1; then
echo "[INFO] RPM-based distro detected. Installing Helm via official script..."
if ! command -v curl >/dev/null 2>&1; then
sudo dnf install -y curl tar || sudo yum install -y curl tar || true
fi
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
return 0
fi
if command -v zypper >/dev/null 2>&1; then
echo "[INFO] openSUSE/SLES detected. Trying zypper first, then official script..."
sudo zypper refresh
sudo zypper install -y helm || { curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash; }
return 0
fi
if command -v apt-get >/dev/null 2>&1; then
echo "[INFO] Debian/Ubuntu detected. Installing Helm from the official repo..."
sudo apt-get update -y
sudo apt-get install -y apt-transport-https gnupg curl
curl -fsSL https://baltocdn.com/helm/signing.asc | sudo gpg --dearmor -o /usr/share/keyrings/helm.gpg
echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/helm.gpg] https://baltocdn.com/helm/stable/debian/ all main" | sudo tee /etc/apt/sources.list.d/helm-stable-debian.list >/dev/null
sudo apt-get update -y
sudo apt-get install -y helm
return 0
fi
echo "[INFO] Fallback: Installing Helm via official script..."
curl -fsSL https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
return 0
}
infer_cluster_name_from_kubeconfig() {
# Try to infer from current context cluster name
local cluster_full
cluster_full=$(kubectl config view --minify -o jsonpath='{.clusters[0].name}' 2>/dev/null || echo "")
if [[ -z "$cluster_full" ]]; then
echo ""
return 0
fi
# If it's an ARN: arn:aws:eks:region:acct:cluster/NAME -> extract tail after 'cluster/'
if [[ "$cluster_full" == arn:aws:eks:* ]]; then
echo "${cluster_full##*/}"
else
echo "$cluster_full"
fi
}
wait_for_nodes_ready() {
echo "[INFO] Waiting for nodes to be Ready..."
# Fast check: if no nodes, exit
local node_cnt
node_cnt=$(kubectl get nodes --no-headers 2>/dev/null | wc -l | tr -d ' ')
if [[ "${node_cnt}" -eq 0 ]]; then
echo "[ERROR] No nodes found in the cluster. Ensure EKS cluster is ready."
exit 1
fi
kubectl wait --for=condition=Ready nodes --all --timeout=180s || {
echo "[ERROR] Not all nodes are Ready within timeout."
exit 1
}
echo "[INFO] Nodes are Ready."
}
check_efs_available() {
echo "[INFO] Checking EFS $EFS_ID lifecycle state..."
local state
state=$(aws efs describe-file-systems --file-system-id "$EFS_ID" --query 'FileSystems[0].LifeCycleState' --output text 2>/dev/null || echo "UNKNOWN")
if [[ "${state}" != "available" ]]; then
echo "[ERROR] EFS ${EFS_ID} is not 'available' (current: ${state})."
exit 1
fi
echo "[INFO] EFS ${EFS_ID} is available."
}
ensure_namespace() {
local ns="$1"
if ! kubectl get ns "$ns" >/dev/null 2>&1; then
echo "[INFO] Creating namespace ${ns}..."
kubectl create ns "$ns"
else
echo "[INFO] Namespace ${ns} already exists."
fi
}
ensure_helm_repo() {
local name="$1"
local url="$2"
if helm repo list | awk '{print $1}' | grep -qx "$name"; then
echo "[INFO] Helm repo '$name' already exists. Updating..."
else
echo "[INFO] Adding helm repo '$name' -> $url"
helm repo add "$name" "$url"
fi
}
install_alb_controller() {
echo "[INFO] Installing/Upgrading AWS Load Balancer Controller..."
ensure_helm_repo "eks" "https://aws.github.io/eks-charts"
helm repo update
# Minimal flags per step.md
helm upgrade --install aws-load-balancer-controller eks/aws-load-balancer-controller \
-n kube-system \
--set clusterName="${CLUSTER_NAME}"
# Wait for rollout
kubectl -n kube-system rollout status deployment/aws-load-balancer-controller --timeout=300s || true
}
install_ingress_nginx() {
echo "[INFO] Deploying Ingress NGINX from upstream manifest..."
local tmpfile
tmpfile="$(mktemp -t ingress-nginx-XXXXXXXX)"
curl -fsSL "${INGRESS_MANIFEST_URL}" -o "${tmpfile}"
kubectl apply -f "${tmpfile}"
echo "[INFO] Annotating Service with internet-facing scheme..."
kubectl annotate service ingress-nginx-controller \
-n ingress-nginx \
"service.beta.kubernetes.io/aws-load-balancer-scheme=internet-facing" \
--overwrite
echo "[INFO] Waiting for ingress-nginx-controller to be ready..."
kubectl -n ingress-nginx rollout status deployment/ingress-nginx-controller --timeout=300s || true
rm -f "${tmpfile}"
}
install_efs_csi_driver() {
echo "[INFO] Installing/Upgrading AWS EFS CSI Driver..."
ensure_helm_repo "aws-efs-csi-driver" "https://kubernetes-sigs.github.io/aws-efs-csi-driver/"
helm repo update
helm upgrade --install aws-efs-csi-driver \
--namespace kube-system \
aws-efs-csi-driver/aws-efs-csi-driver \
--set "controller.serviceAccount.create=false" \
--set "controller.serviceAccount.name=efs-csi-controller-sa"
# Wait for node daemonset
kubectl -n kube-system rollout status daemonset/efs-csi-node --timeout=300s || true
# Controller may be a deployment
if kubectl -n kube-system get deploy efs-csi-controller >/dev/null 2>&1; then
kubectl -n kube-system rollout status deploy/efs-csi-controller --timeout=300s || true
fi
}
apply_storage_classes() {
echo "[INFO] Applying StorageClasses (efs-sc and efs-sc-dynamic)..."
# efs-sc (static)
cat <<'EOF' | kubectl apply -f -
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: efs-sc
provisioner: efs.csi.aws.com
volumeBindingMode: Immediate
allowVolumeExpansion: true
EOF
# efs-sc-dynamic
cat <<EOF | kubectl apply -f -
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: efs-sc-dynamic
provisioner: efs.csi.aws.com
parameters:
provisioningMode: efs-ap
fileSystemId: ${EFS_ID}
directoryPerms: "700"
basePath: "/dynamic"
subPathPattern: "cdp/\${.PVC.name}"
volumeBindingMode: Immediate
allowVolumeExpansion: true
EOF
}
apply_static_pv_pvc() {
local name="$1"
local size="$2"
local sc="efs-sc"
echo "[INFO] Applying static PV/PVC: ${name} (${size})"
# PV (cluster-scoped)
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolume
metadata:
name: ${name}
spec:
capacity:
storage: ${size}
volumeMode: Filesystem
accessModes:
- ReadWriteMany
storageClassName: ${sc}
persistentVolumeReclaimPolicy: Retain
csi:
driver: efs.csi.aws.com
volumeHandle: ${EFS_ID}
EOF
# PVC (namespaced: cdp)
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: ${name}
namespace: cdp
spec:
accessModes:
- ReadWriteMany
storageClassName: ${sc}
resources:
requests:
storage: ${size}
EOF
}
apply_metrics_server() {
echo "[INFO] Applying Metrics Server..."
kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml
}
# ---------- Pre-flight checks ----------
echo "[INFO] Checking required commands..."
require_cmd kubectl
require_cmd aws
if ! command -v helm >/dev/null 2>&1; then
try_install_helm
fi
require_cmd helm
echo "[INFO] Verifying Kubernetes connectivity..."
kubectl version || { echo "[ERROR] kubectl cannot connect to cluster."; exit 1; }
kubectl get ns kube-system >/dev/null 2>&1 || { echo "[ERROR] Unable to query kube-system namespace."; exit 1; }
wait_for_nodes_ready
echo "[INFO] Verifying AWS CLI identity..."
aws sts get-caller-identity >/dev/null 2>&1 || { echo "[ERROR] AWS CLI not authenticated."; exit 1; }
check_efs_available
# Infer cluster name if not provided
if [[ -z "${CLUSTER_NAME}" ]]; then
CLUSTER_NAME="$(infer_cluster_name_from_kubeconfig || true)"
if [[ -z "${CLUSTER_NAME}" ]]; then
echo "[ERROR] --cluster-name not provided and could not infer from kubeconfig."
exit 1
fi
echo "[INFO] Inferred cluster name: ${CLUSTER_NAME}"
fi
if [[ -n "${AWS_REGION}" ]]; then
echo "[INFO] AWS Region: ${AWS_REGION}"
fi
# ---------- Execute steps ----------
ensure_namespace "cdp"
install_alb_controller
install_ingress_nginx
install_efs_csi_driver
apply_storage_classes
# Static PV/PVCs
apply_static_pv_pvc "pvc-log" "80Gi"
apply_static_pv_pvc "pvc-data" "50Gi"
apply_static_pv_pvc "pvc-mount-t-v1" "100Gi"
apply_static_pv_pvc "pvc-dwh" "200Gi"
apply_metrics_server
echo "[INFO] Summary:"
kubectl get sc
echo "----"
kubectl get pv
echo "----"
kubectl get pvc -n cdp
echo "----"
kubectl -n kube-system get deploy aws-load-balancer-controller || true
kubectl -n ingress-nginx get deploy ingress-nginx-controller || true
kubectl -n kube-system get ds efs-csi-node || true
echo "[SUCCESS] All steps completed."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment