smurching · April 10, 2020 22:01
diff --git a/project-backend.py b/project-backend.py
 ### Two different flows

 """
 Flow 1: ensure there's a run accessible to the local machine prior to running
 the project. 

 Pros:
 1.  User guaranteed that they can access the run created for running the project from the machine that triggered
    project execution.

 Cons:
 1.  that we have to expose this wide validate_project API, which may result in duplicate logic to fetch
    projects for validation
 2.  that it doesn't really make sense to expose a tracking_uri argument to the backend, since
    the fluent project API creates it beforehand. however, maybe eventually the fluent mlflow.run
    API will remove its run_id argument, and we can then pass tracking_uri to it. Note that this limits
    the backend's ability to specify custom run creation behavior based on the tracking URI.
 """
 def run(experiment_id, uri, entry_point=None, parameters=None,
 	version=None, backend="local", backend_config=None, run_id=None, 
 	use_conda=True, storage_dir=None, synchronous=True):
    	# Pseudocode for mlflow.projects.run() implementation, omitting some details like handling of
 	# the use_conda & storage_dir arguments etc.	
 	backend_obj = get_backend(backend)
 	# Perform arbitrary pre-execution validation here, e.g. verifying the project's
 	# execution environment & the user-supplied backend_config
 	backend_obj.validate_project(experiment_id, uri, entry_point, parameters, version, backend, backend_config, run_id)
 	# Get run ID to use for project execution
 	if backend == "local" and run_id:
 		final_run_id = run_id # only the local backend can resume runs
 	else:
 		final_run_id = create_run().info.run_id
 	# Trigger backend run execution
 	submitted_run = backend_obj.run(experiment_id, uri, entry_point, parameters,
 		version, backend_config, final_run_id)
 	if synchronous:
 	    # Wait on submitted run etc
 	    submitted_run.wait()	
 	return submitted_run


 """
 Flow 2: let backend create its own run.

 Pros:
 1. Full flexibility over backend logic, backend can manage run creation however it likes

 Cons:
 1. Plugin implementors responsible for ensuring that it's possible to access the MLflow run
   created for the project execution from the local client. We should document this expectation
   in AbstractBackend.run() docstring
 """

 def run(experiment_id, uri, entry_point=None, parameters=None,
 	version=None, backend="local", backend_config=None, run_id=None, 
 	use_conda=True, storage_dir=None, synchronous=True):
 	# Pseudocode for mlflow.projects.run() implementation, omitting some details like handling of
 	# the use_conda & storage_dir arguments etc.
 	backend_obj = get_backend(backend)
 	# Set a special backend config for the local backend to support resuming runs from
 	# a specific run ID. This is internal functionality specific to the local project backend
 	if backend == "local" and run_id:
 		backend_config["_local_backend_run_id"] = run_id
 	# Trigger backend run execution. The backend can handle run creation/validation however it choses,
 	# additionally with behavior conditional on the tracking URI if desired
 	submitted_run = backend_obj.run(experiment_id, uri, entry_point, parameters,
 		version, backend_config, tracking_uri=mlflow.get_tracking_uri())
 	if synchronous:
 	    # Wait on submitted run etc
 	    submitted_run.wait()
 	return submitted_run
	### Two different flows

	"""
	Flow 1: ensure there's a run accessible to the local machine prior to running
	the project.

	Pros:
	1. User guaranteed that they can access the run created for running the project from the machine that triggered
	project execution.

	Cons:
	1. that we have to expose this wide validate_project API, which may result in duplicate logic to fetch
	projects for validation
	2. that it doesn't really make sense to expose a tracking_uri argument to the backend, since
	the fluent project API creates it beforehand. however, maybe eventually the fluent mlflow.run
	API will remove its run_id argument, and we can then pass tracking_uri to it. Note that this limits
	the backend's ability to specify custom run creation behavior based on the tracking URI.
	"""
	def run(experiment_id, uri, entry_point=None, parameters=None,
	version=None, backend="local", backend_config=None, run_id=None,
	use_conda=True, storage_dir=None, synchronous=True):
	# Pseudocode for mlflow.projects.run() implementation, omitting some details like handling of
	# the use_conda & storage_dir arguments etc.
	backend_obj = get_backend(backend)
	# Perform arbitrary pre-execution validation here, e.g. verifying the project's
	# execution environment & the user-supplied backend_config
	backend_obj.validate_project(experiment_id, uri, entry_point, parameters, version, backend, backend_config, run_id)
	# Get run ID to use for project execution
	if backend == "local" and run_id:
	final_run_id = run_id # only the local backend can resume runs
	else:
	final_run_id = create_run().info.run_id
	# Trigger backend run execution
	submitted_run = backend_obj.run(experiment_id, uri, entry_point, parameters,
	version, backend_config, final_run_id)
	if synchronous:
	# Wait on submitted run etc
	submitted_run.wait()
	return submitted_run


	"""
	Flow 2: let backend create its own run.

	Pros:
	1. Full flexibility over backend logic, backend can manage run creation however it likes

	Cons:
	1. Plugin implementors responsible for ensuring that it's possible to access the MLflow run
	created for the project execution from the local client. We should document this expectation
	in AbstractBackend.run() docstring
	"""

	def run(experiment_id, uri, entry_point=None, parameters=None,
	version=None, backend="local", backend_config=None, run_id=None,
	use_conda=True, storage_dir=None, synchronous=True):
	# Pseudocode for mlflow.projects.run() implementation, omitting some details like handling of
	# the use_conda & storage_dir arguments etc.
	backend_obj = get_backend(backend)
	# Set a special backend config for the local backend to support resuming runs from
	# a specific run ID. This is internal functionality specific to the local project backend
	if backend == "local" and run_id:
	backend_config["_local_backend_run_id"] = run_id
	# Trigger backend run execution. The backend can handle run creation/validation however it choses,
	# additionally with behavior conditional on the tracking URI if desired
	submitted_run = backend_obj.run(experiment_id, uri, entry_point, parameters,
	version, backend_config, tracking_uri=mlflow.get_tracking_uri())
	if synchronous:
	# Wait on submitted run etc
	submitted_run.wait()
	return submitted_run