Last active
April 6, 2024 15:56
-
-
Save quasiben/6f3b1da0a5a7c4ee47c5d06b2e69080a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## start cluster | |
REGION="us-east1" | |
CLUSTER_NAME="dask-rapids-test" | |
NUM_GPUS=2 | |
NUM_WORKERS=2 | |
gcloud dataproc clusters create $CLUSTER_NAME \ | |
--region $REGION \ | |
--image-version=2.0.0-RC22-ubuntu18 \ | |
--master-machine-type n1-standard-16 \ | |
--num-workers $NUM_WORKERS \ | |
--worker-accelerator type=nvidia-tesla-t4,count=$NUM_GPUS \ | |
--worker-machine-type n1-highmem-32 \ | |
--num-worker-local-ssds 4 \ | |
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/gpu/install_gpu_driver.sh,gs://goog-dataproc-initialization-actions-${REGION}/rapids/rapids.sh \ | |
--optional-components=JUPYTER,ZEPPELIN \ | |
--metadata gpu-driver-provider="NVIDIA" \ | |
--metadata rapids-runtime=DASK \ | |
--metadata cuda-version=11.0 \ | |
--metadata rapids-version=0.18 \ | |
--enable-component-gateway \ | |
--properties="^#^spark:spark.yarn.unmanagedAM.enabled=false" | |
## Restart Yarn on Master | |
sudo systemctl restart hadoop-yarn-resourcemanager.service | |
## confirm setup on master node | |
yarn jar /usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell.jar -jar /usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell.jar -shell_command /usr/bin/nvidia-smi -container_resources memory-mb=3072,vcores=1,yarn.io/gpu=2 -num_containers 2 | |
yarn logs -applicationId application_1615261846070_0001 | |
## ssh to master node and start skein | |
skein driver start | |
``` | |
# gpu-yarn.yml | |
name: gpu-yarn | |
queue: default | |
services: | |
dask.scheduler: | |
instances: 1 | |
resources: | |
memory: 4 GiB | |
vcores: 1 | |
gpus: 1 | |
script: | | |
echo "HELLO WORLD" | |
hostname | |
which python | |
/usr/bin/nvidia-smi | |
/opt/conda/bin/python -c 'import numba.cuda; print(numba.cuda.gpus)' | |
``` | |
skein application submit gpu-yarn.yml |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment