Last active
March 6, 2025 02:13
-
-
Save surajssd/a0596ca7785228f025be5c3ac177219f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone https://github.com/surajssd/llm-k8s | |
cd llm-k8s | |
git checkout 4b4dd8e8521346aa3473175eb0c45b4c7e6e6883 | |
source .env | |
export GPU_NODE_COUNT=2 | |
export VM_SIZE="Standard_HB120-16rs_v3" | |
./scripts/deploy-aks.sh deploy_aks | |
./scripts/deploy-aks.sh download_aks_credentials | |
./scripts/deploy-aks.sh add_nodepool | |
./scripts/deploy-aks.sh install_network_operator | |
# See how to create a k8s service and expose an app with the new CNI IP | |
cat <<EOF | kubectl apply -f - | |
apiVersion: apps/v1 | |
kind: Deployment | |
metadata: | |
name: ibtest | |
namespace: default | |
spec: | |
replicas: 2 | |
selector: | |
matchLabels: | |
app: ibtest | |
template: | |
metadata: | |
labels: | |
app: ibtest | |
annotations: | |
# This name should match the IPoIBNetwork object we created earlier. | |
k8s.v1.cni.cncf.io/networks: aks-infiniband | |
spec: | |
containers: | |
- name: ibtest | |
image: ghcr.io/surajssd/doca-ofed:2.10.0 | |
securityContext: | |
capabilities: | |
add: [ "IPC_LOCK" ] | |
privileged: true | |
resources: | |
requests: | |
rdma/rdma_shared_device_a: 1 | |
limits: | |
rdma/rdma_shared_device_a: 1 | |
command: ["/bin/bash", "-c"] | |
args: | |
- sleep inf | |
ports: | |
- containerPort: 19090 | |
affinity: | |
podAntiAffinity: | |
requiredDuringSchedulingIgnoredDuringExecution: | |
- labelSelector: | |
matchExpressions: | |
- key: app | |
operator: In | |
values: | |
- ibtest | |
topologyKey: "kubernetes.io/hostname" | |
nodeAffinity: | |
requiredDuringSchedulingIgnoredDuringExecution: | |
nodeSelectorTerms: | |
- matchExpressions: | |
- key: beta.kubernetes.io/instance-type | |
operator: In | |
values: | |
- Standard_HB120-16rs_v3 | |
- Standard_HB120rs_v3 | |
- Standard_ND96asr_v4 | |
EOF | |
kubectl get pods -A -o json | jq ' | |
.items[] | |
# 1. Only consider Pods whose annotation "k8s.v1.cni.cncf.io/networks" == "ipoibnetwork-test" | |
| select(.metadata.annotations["k8s.v1.cni.cncf.io/networks"] == "ipoibnetwork-test") | |
# 2. Build a small JSON object where the key is the pod name, and the value is | |
# the IP from the "network-status" JSON for interface "net1". | |
| { | |
(.metadata.name): | |
( | |
.metadata.annotations["k8s.v1.cni.cncf.io/network-status"] | |
| fromjson # parse the JSON string into an array | |
| map(select(.interface == "net1")) # only the interface == "net1" | |
| .[] | |
| .ips[] | |
) | |
} | |
' | jq -s add # ^^ 3. Since this emits multiple one-key objects (one per Pod), we use jq again to merge them into a single map |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment