Skip to content

Instantly share code, notes, and snippets.

@atemate
Last active May 22, 2020 03:45
Show Gist options
  • Save atemate/4bc73f69803b411b4d047a733ee34bc6 to your computer and use it in GitHub Desktop.
Save atemate/4bc73f69803b411b4d047a733ee34bc6 to your computer and use it in GitHub Desktop.
Alternative to docker-compose-like yaml format proposed in https://docs.google.com/document/d/1kbdPp_MqaLb9KoKQNyb9V2nRdAReXQn9Mch1IX8GF_M
version: 0.1 # version of this yaml format
## We'd store project's setup in hierarchical constants
environment:
project: # all first-order elements except those like $(.name) simply define constants
version: v2.0 # for example, this constant can be accessed with $(project.version)
id: neuro-21dd4a2
name: my-project
paths:
root:
local: "." # local path needed to synchronization
container: /project # remote path used in synchronization and volume mounts
shared:
local: ~/data
container: /var/storage/shared
# we can also define shortcuts to the constants:
root_path: $(project.paths.root.container)
shared_path: $(project.paths.shared.container)
images:
main:
name: image:$(project.name):$(project.version)
dockerfile: Dockerfile # if 'dockerfile' defined, will build a new image from it and push as 'name'
args:
base_image: neuromation/base:$(project.version)
deploy:
name: image:$(project.name)-deploy:$(project.version)
dockerfile: seldon.Dockerfile
tensorboard:
name: tensorflow/tensorflow:latest
filebrowser:
name: filebrowser/filebrowser:latest
volumes: # here we define hierarchical constants:
project_root: # if we use $(volumes.project_root) below, then
uri: storage:$(project.name) # the whole subtree will be substituted.
path: $(project.paths.root.container) # Also, sub-constants can be accessed with $(volumes.project_root.path)
mode: rw
shared_data:
uri: storage:public/$(project.name) # in future will be also s3:// or blob://
path: $(project.paths.shared.container) # which is "/var/storage/shared"
mode: ro
## Define template job:
$(.jobs): # this is a template definition -- the one that can be inherited and overloaded
$(target): # custom target name to be replaced during definition
image: $(images.main)
preset: cpu-small # Most of these fields can be overridden during invocation: `neuro-extras run train --preset=gpu-large`
http: # Empty by default
http_auth: true
browse: false
volumes:
- $(volumes.project_root)
- $(volumes.shared_data)
entrypoint: # 'neuro run --entrypoint'
cmd:
type: plain-text # alternatively can use "type: mlflow" to re-use MLProject definitions
parameters: {} # Similar to MLProject, we can define parameters (see examples below)
command: # also empty, but neuro will fail if not defined.
env:
- name: PYTHONPATH
value: /var/storage/code
tags:
- target:$(.jobs.target) # note that current template's fields can be accessed via $(.jobs)
- kind:project
- project:$(project.name)
life_span: 1d
postfix: # 'postfix' and 'base_name' are used to form job's name: if postfix not empty,
base_name: $(project.name)-$(.jobs.target) # then job's name will be "{base_name}-{postfix}"
# future improvements
pre_start_hooks:
- neuro-extras sync-up $(project.path_root) # now we need to sync storage with local project
restart_policy: Never # our future service jobs
replicas: 1 # easy run W&B hypter-train: `neuro-extras run train --replicas=3 ...`
depends_on: # pipeline dependency (type: list)
## Actual jobs definitions
jobs: # inherits template $(.jobs)
jupyter: # overrides field $(.target)
image: $(images.main) # defines image
preset: gpu-small
http: 8888
http_auth: true
browse: true
cmd: # the main drawback of Makefile is that it does not allow to pass arguments.
parameters: # So here we define customizeable command similar to MLFlow's entrypoint:
jupyter_mode: {type: string, default: "notebook"} # - example: https://github.com/mlflow/mlflow/blob/master/examples/pytorch/MLproject
# - syntax: https://mlflow.org/docs/0.2.1/projects.html#specifying-parameters
command: | # - usage: `-P jupyter_mode=lab`
jupyter {jupyter_mode} \
--no-browser \
--ip=0.0.0.0 \
--allow-root \
--NotebookApp.token= \
--notebook-dir=$(project.root_path)/notebooks
tensorboard:
image: $(images.tensorboard)
preset: cpu-small
http: 6006
browse: true
cmd:
# recall: we set $(project.root_path) to be the shortcut for $(project.paths.root.container)
command: tensorboard --host=0.0.0.0 --logdir=$(project.root_path)
# Example of user-defined targets:
train:
preset: gpu-small
life_span: 0 # disable --life-span
cmd:
parameters:
prev_version: {type: string}
current_version: {type: string, default: v0}
command: |
python $(project.root_path)/src/train.py \
--data-root $(project.shared_path)/coco2017 \
--previous-version {previous_version} \
--current-version {current_version} \
--max-epochs 100
# example of integration with MLFlow (not --backend=neuro, but as a wrapper of MLProject)
train-with-mlflow:
cmd:
type: mlflow # Will look at `./MLProject` and try to find there entrypoint named "train-with-mlflow"
# see example: https://github.com/mlflow/mlflow/blob/master/examples/pytorch/MLproject
# example of building custom image:
deploy:
image:
name: $(images.main.deploy.name)
dockerfile: seldon-custom.Dockerfile
preset: cpu-small
http: 9000
http_auth: false
volumes: # to add extra volumes, you need to re-write defaults:
- $(volumes.project_root)
- $(volumes.shared_data)
- model_location:
uri: storage:public/$(project.name)/models
path: $(project.shared_path)/models
mode: ro
cmd:
command: seldon-core-microservice seldon_model REST
restart_policy: Always
## Usage
# alias nn=neuro-extras
# nn run train
# nn run train --preset=gpu-large
# nn run train --preset=gpu-large -P previous_version=v0 -P current_version=v1 # (override job's command parameters)
# nn run train --postfix=new-experiment # runs the job and uses postfix to avoid job name conflict
# nn run train --replicas=3 # runs 3 similar jobs with no name conflict (with postfixes: 1, 2, 3)
# nn kill train
# nn logs train
# nn exec train ls -l
# nn connect train same as `neuro exec {train-job-name} bash`
# nn ps # lists jobs in current project only
# nn ps --hyptertrain # same as `neuro ps --tag target:hyptertrain`
# nn kill $(nn ps) # kill all jobs in current project
## Debugging targets:
# nn display run train # prints commands that it would run with `neuro-extras run train`
# nn display run train --http=8080 # same with arguments
## Storage targets:
# neuro-extras sync-up root # same as: `neuro cp -r -u $(project.paths.root.local) $(project.paths.root.job)`
# neuro-extras sync-down shared # same as: `neuro cp -r -u $(project.paths.root.job) $(project.paths.root.local)`
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment