Skip to content

Instantly share code, notes, and snippets.

@eric-czech
Last active November 13, 2020 22:27
Show Gist options
  • Save eric-czech/892149a9240258f846ab86f7f3e9092b to your computer and use it in GitHub Desktop.
Save eric-czech/892149a9240258f846ab86f7f3e9092b to your computer and use it in GitHub Desktop.
Dask Cloud Provider Usage for GCP
# Dask Cloud Provider REPL
#
# This is useful for creating clusters indepedent of the code that runs on them
# Example: python scripts/cloudprovider.py -- --interactive
#
from dask_cloudprovider.gcp.instances import GCPCluster
import fire
import os
import json
DEFAULT_ENV_VARS = {
"EXTRA_CONDA_PACKAGES": "\"numba==0.51.2 xarray==0.16.1 gcsfs==0.7.1 dask-ml==1.7.0 zarr==2.4.0 -c conda-forge\"",
"EXTRA_PIP_PACKAGES": "\"git+https://github.com/pystatgen/sgkit.git@9c24b3c2e7f8d3d40e25ece032d21c59ce696d3a#egg=sgkit\""
}
class CLI():
def __init__(self):
self.cluster = None
def create(
self,
n_workers=None,
env_vars=DEFAULT_ENV_VARS,
name=None,
**kwargs
):
self.cluster = GCPCluster(name=name, n_workers=n_workers, env_vars=env_vars, **kwargs)
return self
def _validate(self):
if self.cluster is None:
raise ValueError('Must create cluster first with `create` function')
def adapt(self, min_workers, max_workers, interval="60s", wait_count=3):
self._validate()
self.cluster.adapt(minimum=min_workers, maximum=max_workers, interval=interval, wait_count=wait_count)
return self
def scale(self, n_workers):
self._validate()
self.cluster.scale(n_workers)
return self
def shutdown(self):
self._validate()
self.cluster.close()
return self
def export_scheduler_info(self, path="/tmp/scheduler-info.txt"):
self._validate()
props = {
"hostname": self.cluster.scheduler.name,
"internal_ip": self.cluster.scheduler.internal_ip,
"external_ip": self.cluster.scheduler.external_ip
}
with open(path, 'w') as f:
for k, v in props.items():
f.write(f"{k}={v}")
return self
def cluster(self):
return self.cluster
if __name__ == "__main__":
fire.Fire(CLI)
export DASK_CLOUDPROVIDER__GCP__NAME=dask-gwas-1
export DASK_CLOUDPROVIDER__GCP__ZONE=$GCP_ZONE
export DASK_CLOUDPROVIDER__GCP__PROJECTID=$GCP_PROJECT
export DASK_CLOUDPROVIDER__GCP__MACHINE_TYPE=n1-standard-8
export DASK_CLOUDPROVIDER__GCP__DOCKER_IMAGE=daskdev/dask:2.30.0
export DASK_CLOUDPROVIDER__GCP__PUBLIC_INGRESS=False
name: cloudprovider
channels:
- conda-forge
- bioconda
dependencies:
- python=3.8
- dask==2.30.0
- fire
- pip
- pip:
- git+https://github.com/dask/dask-cloudprovider.git@35deeb415e061ca90973fd24e56b1b7a6f54bc16#egg=dask-cloudprovider[gcp]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment