atemate · May 22, 2020 03:45
diff --git a/template-proposal-alternative.yaml b/template-proposal-alternative.yaml
 version: 0.1  # version of this yaml format

 ## We'd store project's setup in hierarchical constants
 environment:

  project:              # all first-order elements except those like $(.name) simply define constants
    version: v2.0       # for example, this constant can be accessed with $(project.version)
    id: neuro-21dd4a2
    name: my-project
    paths:
      root:
        local: "."            # local path needed to synchronization
        container: /project   # remote path used in synchronization and volume mounts
      shared:
        local: ~/data
        container: /var/storage/shared

    # we can also define shortcuts to the constants:
    root_path: $(project.paths.root.container)
    shared_path: $(project.paths.shared.container)

  images:
    main: 
      name: image:$(project.name):$(project.version)
      dockerfile: Dockerfile  # if 'dockerfile' defined, will build a new image from it and push as 'name'
      args:
        base_image: neuromation/base:$(project.version)
    deploy:
      name: image:$(project.name)-deploy:$(project.version)
      dockerfile: seldon.Dockerfile
    tensorboard:
      name: tensorflow/tensorflow:latest
    filebrowser:
      name: filebrowser/filebrowser:latest

  volumes:                                    # here we define hierarchical constants: 
    project_root:                             # if we use $(volumes.project_root) below, then
      uri: storage:$(project.name)            # the whole subtree will be substituted.
      path: $(project.paths.root.container)   # Also, sub-constants can be accessed with $(volumes.project_root.path)
      mode: rw
    shared_data:
      uri: storage:public/$(project.name)     # in future will be also s3:// or blob://
      path: $(project.paths.shared.container) # which is "/var/storage/shared"
      mode: ro



 ## Define template job:

 $(.jobs):           # this is a template definition -- the one that can be inherited and overloaded
  $(target):        # custom target name to be replaced during definition
    image: $(images.main)
    preset: cpu-small       # Most of these fields can be overridden during invocation: `neuro-extras run train --preset=gpu-large`
    http:                   # Empty by default
    http_auth: true
    browse: false
    volumes:
      - $(volumes.project_root)
      - $(volumes.shared_data)
    entrypoint:             # 'neuro run --entrypoint'
    cmd:
      type: plain-text      # alternatively can use "type: mlflow" to re-use MLProject definitions
      parameters: {}        # Similar to MLProject, we can define parameters (see examples below)
      command:              # also empty, but neuro will fail if not defined.
    env:
      - name: PYTHONPATH
        value: /var/storage/code
    tags:
      - target:$(.jobs.target)  # note that current template's fields can be accessed via $(.jobs)
      - kind:project
      - project:$(project.name)
    life_span: 1d
    postfix:                                     # 'postfix' and 'base_name' are used to form job's name: if postfix not empty, 
    base_name: $(project.name)-$(.jobs.target)   # then job's name will be "{base_name}-{postfix}"
    # future improvements
    pre_start_hooks:
      - neuro-extras sync-up $(project.path_root)  # now we need to sync storage with local project 
    restart_policy: Never   # our future service jobs
    replicas: 1             # easy run W&B hypter-train: `neuro-extras run train --replicas=3 ...`
    depends_on:             # pipeline dependency (type: list)


 ## Actual jobs definitions

 jobs:                         # inherits template $(.jobs)

  jupyter:                    # overrides field $(.target)
    image: $(images.main)     # defines image
    preset: gpu-small
    http: 8888
    http_auth: true
    browse: true
    cmd:                                                    # the main drawback of Makefile is that it does not allow to pass arguments.
      parameters:                                           # So here we define customizeable command similar to MLFlow's entrypoint:
        jupyter_mode: {type: string, default: "notebook"}   # - example: https://github.com/mlflow/mlflow/blob/master/examples/pytorch/MLproject
                                                            # - syntax: https://mlflow.org/docs/0.2.1/projects.html#specifying-parameters
      command: |                                            # - usage: `-P jupyter_mode=lab`
        jupyter {jupyter_mode} \
          --no-browser \
          --ip=0.0.0.0 \
          --allow-root \
          --NotebookApp.token= \
          --notebook-dir=$(project.root_path)/notebooks

  tensorboard:
    image: $(images.tensorboard)
    preset: cpu-small
    http: 6006
    browse: true
    cmd:
      # recall: we set $(project.root_path) to be the shortcut for $(project.paths.root.container)
      command: tensorboard --host=0.0.0.0 --logdir=$(project.root_path)

  # Example of user-defined targets:

  train:
    preset: gpu-small
    life_span: 0  # disable --life-span
    cmd:
      parameters:
        prev_version:    {type: string}
        current_version: {type: string, default: v0}
      command: |
        python $(project.root_path)/src/train.py \
          --data-root $(project.shared_path)/coco2017 \
          --previous-version {previous_version} \
          --current-version {current_version} \
          --max-epochs 100
      
  # example of integration with MLFlow (not --backend=neuro, but as a wrapper of MLProject)
  train-with-mlflow:
    cmd:
      type: mlflow  # Will look at `./MLProject` and try to find there entrypoint named "train-with-mlflow"
                    # see example: https://github.com/mlflow/mlflow/blob/master/examples/pytorch/MLproject

  # example of building custom image:
  deploy:
    image:
      name: $(images.main.deploy.name)
      dockerfile: seldon-custom.Dockerfile
    preset: cpu-small
    http: 9000
    http_auth: false
    volumes:                  # to add extra volumes, you need to re-write defaults:
      - $(volumes.project_root)
      - $(volumes.shared_data)
      - model_location:
          uri: storage:public/$(project.name)/models
          path: $(project.shared_path)/models
          mode: ro
    cmd:
      command: seldon-core-microservice seldon_model REST
    restart_policy: Always



 ## Usage
 # alias nn=neuro-extras
 # nn run train
 # nn run train --preset=gpu-large
 # nn run train --preset=gpu-large  -P previous_version=v0 -P current_version=v1 # (override job's command parameters)
 # nn run train --postfix=new-experiment   # runs the job and uses postfix to avoid job name conflict
 # nn run train --replicas=3               # runs 3 similar jobs with no name conflict (with postfixes: 1, 2, 3)
 # nn kill train
 # nn logs train
 # nn exec train ls -l
 # nn connect train     same as `neuro exec {train-job-name} bash`
 # nn ps               # lists jobs in current project only
 # nn ps --hyptertrain # same as `neuro ps --tag target:hyptertrain`
 # nn kill $(nn ps)    # kill all jobs in current project

 ## Debugging targets:
 # nn display run train              # prints commands that it would run with `neuro-extras run train`
 # nn display run train --http=8080  # same with arguments

 ## Storage targets:
 # neuro-extras sync-up   root    # same as: `neuro cp -r -u $(project.paths.root.local) $(project.paths.root.job)`
 # neuro-extras sync-down shared  # same as: `neuro cp -r -u $(project.paths.root.job) $(project.paths.root.local)`
	version: 0.1 # version of this yaml format

	## We'd store project's setup in hierarchical constants
	environment:

	project: # all first-order elements except those like $(.name) simply define constants
	version: v2.0 # for example, this constant can be accessed with $(project.version)
	id: neuro-21dd4a2
	name: my-project
	paths:
	root:
	local: "." # local path needed to synchronization
	container: /project # remote path used in synchronization and volume mounts
	shared:
	local: ~/data
	container: /var/storage/shared

	# we can also define shortcuts to the constants:
	root_path: $(project.paths.root.container)
	shared_path: $(project.paths.shared.container)

	images:
	main:
	name: image:$(project.name):$(project.version)
	dockerfile: Dockerfile # if 'dockerfile' defined, will build a new image from it and push as 'name'
	args:
	base_image: neuromation/base:$(project.version)
	deploy:
	name: image:$(project.name)-deploy:$(project.version)
	dockerfile: seldon.Dockerfile
	tensorboard:
	name: tensorflow/tensorflow:latest
	filebrowser:
	name: filebrowser/filebrowser:latest

	volumes: # here we define hierarchical constants:
	project_root: # if we use $(volumes.project_root) below, then
	uri: storage:$(project.name) # the whole subtree will be substituted.
	path: $(project.paths.root.container) # Also, sub-constants can be accessed with $(volumes.project_root.path)
	mode: rw
	shared_data:
	uri: storage:public/$(project.name) # in future will be also s3:// or blob://
	path: $(project.paths.shared.container) # which is "/var/storage/shared"
	mode: ro



	## Define template job:

	$(.jobs): # this is a template definition -- the one that can be inherited and overloaded
	$(target): # custom target name to be replaced during definition
	image: $(images.main)
	preset: cpu-small # Most of these fields can be overridden during invocation: `neuro-extras run train --preset=gpu-large`
	http: # Empty by default
	http_auth: true
	browse: false
	volumes:
	- $(volumes.project_root)
	- $(volumes.shared_data)
	entrypoint: # 'neuro run --entrypoint'
	cmd:
	type: plain-text # alternatively can use "type: mlflow" to re-use MLProject definitions
	parameters: {} # Similar to MLProject, we can define parameters (see examples below)
	command: # also empty, but neuro will fail if not defined.
	env:
	- name: PYTHONPATH
	value: /var/storage/code
	tags:
	- target:$(.jobs.target) # note that current template's fields can be accessed via $(.jobs)
	- kind:project
	- project:$(project.name)
	life_span: 1d
	postfix: # 'postfix' and 'base_name' are used to form job's name: if postfix not empty,
	base_name: $(project.name)-$(.jobs.target) # then job's name will be "{base_name}-{postfix}"
	# future improvements
	pre_start_hooks:
	- neuro-extras sync-up $(project.path_root) # now we need to sync storage with local project
	restart_policy: Never # our future service jobs
	replicas: 1 # easy run W&B hypter-train: `neuro-extras run train --replicas=3 ...`
	depends_on: # pipeline dependency (type: list)


	## Actual jobs definitions

	jobs: # inherits template $(.jobs)

	jupyter: # overrides field $(.target)
	image: $(images.main) # defines image
	preset: gpu-small
	http: 8888
	http_auth: true
	browse: true
	cmd: # the main drawback of Makefile is that it does not allow to pass arguments.
	parameters: # So here we define customizeable command similar to MLFlow's entrypoint:
	jupyter_mode: {type: string, default: "notebook"} # - example: https://github.com/mlflow/mlflow/blob/master/examples/pytorch/MLproject
	# - syntax: https://mlflow.org/docs/0.2.1/projects.html#specifying-parameters
	command: \| # - usage: `-P jupyter_mode=lab`
	jupyter {jupyter_mode} \
	--no-browser \
	--ip=0.0.0.0 \
	--allow-root \
	--NotebookApp.token= \
	--notebook-dir=$(project.root_path)/notebooks

	tensorboard:
	image: $(images.tensorboard)
	preset: cpu-small
	http: 6006
	browse: true
	cmd:
	# recall: we set $(project.root_path) to be the shortcut for $(project.paths.root.container)
	command: tensorboard --host=0.0.0.0 --logdir=$(project.root_path)

	# Example of user-defined targets:

	train:
	preset: gpu-small
	life_span: 0 # disable --life-span
	cmd:
	parameters:
	prev_version: {type: string}
	current_version: {type: string, default: v0}
	command: \|
	python $(project.root_path)/src/train.py \
	--data-root $(project.shared_path)/coco2017 \
	--previous-version {previous_version} \
	--current-version {current_version} \
	--max-epochs 100

	# example of integration with MLFlow (not --backend=neuro, but as a wrapper of MLProject)
	train-with-mlflow:
	cmd:
	type: mlflow # Will look at `./MLProject` and try to find there entrypoint named "train-with-mlflow"
	# see example: https://github.com/mlflow/mlflow/blob/master/examples/pytorch/MLproject

	# example of building custom image:
	deploy:
	image:
	name: $(images.main.deploy.name)
	dockerfile: seldon-custom.Dockerfile
	preset: cpu-small
	http: 9000
	http_auth: false
	volumes: # to add extra volumes, you need to re-write defaults:
	- $(volumes.project_root)
	- $(volumes.shared_data)
	- model_location:
	uri: storage:public/$(project.name)/models
	path: $(project.shared_path)/models
	mode: ro
	cmd:
	command: seldon-core-microservice seldon_model REST
	restart_policy: Always



	## Usage
	# alias nn=neuro-extras
	# nn run train
	# nn run train --preset=gpu-large
	# nn run train --preset=gpu-large -P previous_version=v0 -P current_version=v1 # (override job's command parameters)
	# nn run train --postfix=new-experiment # runs the job and uses postfix to avoid job name conflict
	# nn run train --replicas=3 # runs 3 similar jobs with no name conflict (with postfixes: 1, 2, 3)
	# nn kill train
	# nn logs train
	# nn exec train ls -l
	# nn connect train same as `neuro exec {train-job-name} bash`
	# nn ps # lists jobs in current project only
	# nn ps --hyptertrain # same as `neuro ps --tag target:hyptertrain`
	# nn kill $(nn ps) # kill all jobs in current project

	## Debugging targets:
	# nn display run train # prints commands that it would run with `neuro-extras run train`
	# nn display run train --http=8080 # same with arguments

	## Storage targets:
	# neuro-extras sync-up root # same as: `neuro cp -r -u $(project.paths.root.local) $(project.paths.root.job)`
	# neuro-extras sync-down shared # same as: `neuro cp -r -u $(project.paths.root.job) $(project.paths.root.local)`