Skip to content

Instantly share code, notes, and snippets.

export INSTANCE_GROUP_NAME="deeplearning-instance-group"
gcloud compute instance-groups managed set-autoscaling $INSTANCE_GROUP_NAME \
--custom-metric-utilization metric=custom.googleapis.com/gpu_utilization,utilization-target-type=GAUGE,utilization-target=85 \
--max-num-replicas 4 \
--cool-down-period 360 \
--region us-west1
git clone https://github.com/wilicc/gpu-burn.git
cd gpu-burn
git checkout c0b072aa09c360c17a065368294159a6cef59ddf
make
./gpu_burn 600 > /dev/null &
export HEALTH_CHECK_NAME="http-basic-check"
gcloud compute health-checks create http $HEALTH_CHECK_NAME \
--request-path /v1/models/default \
--port 8888
export HEALTH_CHECK_NAME="http-basic-check"
export WEB_BACKED_SERVICE_NAME="tensorflow-backend"
gcloud compute backend-services create $WEB_BACKED_SERVICE_NAME \
--protocol HTTP \
--health-checks $HEALTH_CHECK_NAME \
--global
export INSTANCE_GROUP_NAME="deeplearning-instance-group"
export WEB_BACKED_SERVICE_NAME="tensorflow-backend"
gcloud compute backend-services add-backend $WEB_BACKED_SERVICE_NAME \
--balancing-mode UTILIZATION \
--max-utilization 0.8 \
--capacity-scaler 1 \
--instance-group $INSTANCE_GROUP_NAME \
--instance-group-region us-west1 \
--global
export WEB_BACKED_SERVICE_NAME="tensorflow-backend"
export WEB_MAP_NAME="map-all"
gcloud compute url-maps create $WEB_MAP_NAME \
--default-service $WEB_BACKED_SERVICE_NAME
export WEB_MAP_NAME="map-all"
export LB_NAME="tf-lb"
gcloud compute target-http-proxies create $LB_NAME \
--url-map $WEB_MAP_NAME
export IP4_NAME="lb-ip4"
gcloud compute addresses create $IP4_NAME \
--ip-version=IPV4 \
--global
export IP=...
export LB_NAME="tf-lb"
export FORWARDING_RULE="lb-fwd-rule"
gcloud compute forwarding-rules create $FORWARDING_RULE \
--address $IP \
--global \
--target-http-proxy $LB_NAME \
--ports 80
#!/bin/bash
# GPU Agent
git clone https://github.com/b0noI/gcp-gpu-utilization-metrics.git
cd gcp-gpu-utilization-metrics
git checkout 6e62ea324bf097817474b51119786e8222dd9fdf
pip install -r ./requirenments.txt
cp ./report_gpu_metrics.py /root/report_gpu_metrics.py
cat <<-EOH > /lib/systemd/system/gpu_utilization_agent.service