Last active
May 22, 2020 03:45
-
-
Save atemate/4bc73f69803b411b4d047a733ee34bc6 to your computer and use it in GitHub Desktop.
Alternative to docker-compose-like yaml format proposed in https://docs.google.com/document/d/1kbdPp_MqaLb9KoKQNyb9V2nRdAReXQn9Mch1IX8GF_M
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
version: 0.1 # version of this yaml format | |
## We'd store project's setup in hierarchical constants | |
environment: | |
project: # all first-order elements except those like $(.name) simply define constants | |
version: v2.0 # for example, this constant can be accessed with $(project.version) | |
id: neuro-21dd4a2 | |
name: my-project | |
paths: | |
root: | |
local: "." # local path needed to synchronization | |
container: /project # remote path used in synchronization and volume mounts | |
shared: | |
local: ~/data | |
container: /var/storage/shared | |
# we can also define shortcuts to the constants: | |
root_path: $(project.paths.root.container) | |
shared_path: $(project.paths.shared.container) | |
images: | |
main: | |
name: image:$(project.name):$(project.version) | |
dockerfile: Dockerfile # if 'dockerfile' defined, will build a new image from it and push as 'name' | |
args: | |
base_image: neuromation/base:$(project.version) | |
deploy: | |
name: image:$(project.name)-deploy:$(project.version) | |
dockerfile: seldon.Dockerfile | |
tensorboard: | |
name: tensorflow/tensorflow:latest | |
filebrowser: | |
name: filebrowser/filebrowser:latest | |
volumes: # here we define hierarchical constants: | |
project_root: # if we use $(volumes.project_root) below, then | |
uri: storage:$(project.name) # the whole subtree will be substituted. | |
path: $(project.paths.root.container) # Also, sub-constants can be accessed with $(volumes.project_root.path) | |
mode: rw | |
shared_data: | |
uri: storage:public/$(project.name) # in future will be also s3:// or blob:// | |
path: $(project.paths.shared.container) # which is "/var/storage/shared" | |
mode: ro | |
## Define template job: | |
$(.jobs): # this is a template definition -- the one that can be inherited and overloaded | |
$(target): # custom target name to be replaced during definition | |
image: $(images.main) | |
preset: cpu-small # Most of these fields can be overridden during invocation: `neuro-extras run train --preset=gpu-large` | |
http: # Empty by default | |
http_auth: true | |
browse: false | |
volumes: | |
- $(volumes.project_root) | |
- $(volumes.shared_data) | |
entrypoint: # 'neuro run --entrypoint' | |
cmd: | |
type: plain-text # alternatively can use "type: mlflow" to re-use MLProject definitions | |
parameters: {} # Similar to MLProject, we can define parameters (see examples below) | |
command: # also empty, but neuro will fail if not defined. | |
env: | |
- name: PYTHONPATH | |
value: /var/storage/code | |
tags: | |
- target:$(.jobs.target) # note that current template's fields can be accessed via $(.jobs) | |
- kind:project | |
- project:$(project.name) | |
life_span: 1d | |
postfix: # 'postfix' and 'base_name' are used to form job's name: if postfix not empty, | |
base_name: $(project.name)-$(.jobs.target) # then job's name will be "{base_name}-{postfix}" | |
# future improvements | |
pre_start_hooks: | |
- neuro-extras sync-up $(project.path_root) # now we need to sync storage with local project | |
restart_policy: Never # our future service jobs | |
replicas: 1 # easy run W&B hypter-train: `neuro-extras run train --replicas=3 ...` | |
depends_on: # pipeline dependency (type: list) | |
## Actual jobs definitions | |
jobs: # inherits template $(.jobs) | |
jupyter: # overrides field $(.target) | |
image: $(images.main) # defines image | |
preset: gpu-small | |
http: 8888 | |
http_auth: true | |
browse: true | |
cmd: # the main drawback of Makefile is that it does not allow to pass arguments. | |
parameters: # So here we define customizeable command similar to MLFlow's entrypoint: | |
jupyter_mode: {type: string, default: "notebook"} # - example: https://github.com/mlflow/mlflow/blob/master/examples/pytorch/MLproject | |
# - syntax: https://mlflow.org/docs/0.2.1/projects.html#specifying-parameters | |
command: | # - usage: `-P jupyter_mode=lab` | |
jupyter {jupyter_mode} \ | |
--no-browser \ | |
--ip=0.0.0.0 \ | |
--allow-root \ | |
--NotebookApp.token= \ | |
--notebook-dir=$(project.root_path)/notebooks | |
tensorboard: | |
image: $(images.tensorboard) | |
preset: cpu-small | |
http: 6006 | |
browse: true | |
cmd: | |
# recall: we set $(project.root_path) to be the shortcut for $(project.paths.root.container) | |
command: tensorboard --host=0.0.0.0 --logdir=$(project.root_path) | |
# Example of user-defined targets: | |
train: | |
preset: gpu-small | |
life_span: 0 # disable --life-span | |
cmd: | |
parameters: | |
prev_version: {type: string} | |
current_version: {type: string, default: v0} | |
command: | | |
python $(project.root_path)/src/train.py \ | |
--data-root $(project.shared_path)/coco2017 \ | |
--previous-version {previous_version} \ | |
--current-version {current_version} \ | |
--max-epochs 100 | |
# example of integration with MLFlow (not --backend=neuro, but as a wrapper of MLProject) | |
train-with-mlflow: | |
cmd: | |
type: mlflow # Will look at `./MLProject` and try to find there entrypoint named "train-with-mlflow" | |
# see example: https://github.com/mlflow/mlflow/blob/master/examples/pytorch/MLproject | |
# example of building custom image: | |
deploy: | |
image: | |
name: $(images.main.deploy.name) | |
dockerfile: seldon-custom.Dockerfile | |
preset: cpu-small | |
http: 9000 | |
http_auth: false | |
volumes: # to add extra volumes, you need to re-write defaults: | |
- $(volumes.project_root) | |
- $(volumes.shared_data) | |
- model_location: | |
uri: storage:public/$(project.name)/models | |
path: $(project.shared_path)/models | |
mode: ro | |
cmd: | |
command: seldon-core-microservice seldon_model REST | |
restart_policy: Always | |
## Usage | |
# alias nn=neuro-extras | |
# nn run train | |
# nn run train --preset=gpu-large | |
# nn run train --preset=gpu-large -P previous_version=v0 -P current_version=v1 # (override job's command parameters) | |
# nn run train --postfix=new-experiment # runs the job and uses postfix to avoid job name conflict | |
# nn run train --replicas=3 # runs 3 similar jobs with no name conflict (with postfixes: 1, 2, 3) | |
# nn kill train | |
# nn logs train | |
# nn exec train ls -l | |
# nn connect train same as `neuro exec {train-job-name} bash` | |
# nn ps # lists jobs in current project only | |
# nn ps --hyptertrain # same as `neuro ps --tag target:hyptertrain` | |
# nn kill $(nn ps) # kill all jobs in current project | |
## Debugging targets: | |
# nn display run train # prints commands that it would run with `neuro-extras run train` | |
# nn display run train --http=8080 # same with arguments | |
## Storage targets: | |
# neuro-extras sync-up root # same as: `neuro cp -r -u $(project.paths.root.local) $(project.paths.root.job)` | |
# neuro-extras sync-down shared # same as: `neuro cp -r -u $(project.paths.root.job) $(project.paths.root.local)` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment