smurching · April 4, 2019 18:18
diff --git a/0_description.md b/0_description.md
diff --git a/1_new_api_proposed.py b/1_new_api_proposed.py
 """
 This file contains an example of querying a run's params, metrics, and tags using the newly proposed RunData interface
 (Option 1). Any feedback is much appreciated, especially regarding query workflows that we should consider while
 designing the new interface.
 """
 run = mlflow.get_run("...")

 # 1. Simple example of looking up metric/param/tag values
 tag_key = "mlflow.docker"
 metric_key = "rmse"
 param_key = "alpha"
 tag_val = run.data.tags[tag_key] # tag_val is a string, e.g. 'true'
 metric_val = run.data.metrics[metric_key] # metric_obj is a float value, e.g. '0.1'
 param_val = run.data.params[param_key] # param_value is a string, e.g. '1.0'
 print("Tag key: %s, value: %s" % (tag_key, tag_val))
 print("Metric key: %s, value: %s" % (metric_key, metric_val))
 print("Param key: %s, value: %s" % (param_key, param_val))

 # 2. We can access metric timestamps using the get_metric_history API, assuming it's exposed as described in the proposal
 client = mlflow.tracking.MlflowClient()
 metric_objs = client.get_metric_history(metric_key)
 print([metric.timestamp for metric in metric_objs])

 # 3. Some examples of comparing data across runs.
 # Note that comparing `run.data.metrics` across runs only considers metric values, rather than timestamp/x coordinate.
 # This may or may not be desirable - in general though, it seems unlikely for metrics to have the exact same values across
 # runs.
 new_run = mlflow.get_run("...")
 if run.data.metrics == new_run.data.metrics:
  print("Two runs had identical metrics. We might use such a comparison to decide whether to update a model, etc "
        "(although in practice it's probably unlikely that metrics would have the exact same values across runs)")
  
 if run.data.params == new_run.data.params:
  print("Two runs had identical params. I'm not entirely sure when such a comparison would be useful (more common "
        "might be to use the search API to query for an existing run with the same parameters before launching "
        "a new run")
diff --git a/2_new_api_alternative.py b/2_new_api_alternative.py
 """
 This file contains an example of querying a run's params, metrics, and tags using an alternative RunData interface
 (Option 2). Any feedback is much appreciated, especially regarding query workflows that we should consider while
 designing the new interface.
 """
 run = mlflow.get_run("925dbee9028f415fb66d1005a9d90f2f")

 # 1. Simple example of looking up metric/param/tag values
 tag_key = "mlflow.docker"
 metric_key = "rmse"
 param_key = "alpha"
 tag_obj = run.data.tags[tag_key]
 metric_obj = run.data.metrics[metric_key]
 param_obj = run.data.params[param_key]
 print("Tag key: %s, value: %s" % (tag_key, tag_obj.value))
 print("Metric key: %s, value: %s" % (metric_key, metric_obj.value))
 print("Param key: %s, value: %s" % (param_key, param_obj.value))

 # 2. We can access the most-recent timestamps for each metric key from the metric objects in RunData
 client = mlflow.tracking.MlflowClient()
 print([metric.timestamp for metric in run.data.metrics.values()])

 # 3. Some examples of comparing data across runs. Directly comparing metrics/params/tags won't work with the current
 # entity implementations, but we can make it work by adding an "equals" implementation to the entities e.g. as in
 # https://github.com/mlflow/mlflow/pull/961/files.
 new_run = mlflow.get_run("...")
 if run.data.metrics == new_run.data.metrics:
  print("Two runs had identical metrics. We might use such a comparison to decide whether to update a model, etc "
        "(although in practice it's probably unlikely that metrics would have the exact same values across runs)")
  
 if run.data.params == new_run.data.params:
  print("Two runs had identical params. I'm not entirely sure when such a comparison would be useful (more common "
        "might be to use the search API to query for an existing run with the same parameters before launching "
        "a new run")
diff --git a/3_old_api.py b/3_old_api.py
 """
 This file contains an example of querying a run's params, metrics, and tags using the existing RunData API.
 """
 # 1. Simple example of looking up metric/param/tag values. Note that it's quite complex to look up
 # a metric/param/tag by key.
 run = mlflow.get_run("...")
 tag_obj = [tag for tag in run.data.tags if tag.key == "mlflow.docker"][0]
 metric_obj = [metric for metric in run.data.metrics if metric.key == "rmse"][0]
 param_obj = [param for param in run.data.params if param.key == "alpha"][0]
 print("Tag key: %s, value: %s" % (tag.key, tag.value))
 print("Metric key: %s, value: %s, timestamp: %s" % (metric.key, metric.value, metric.timestamp))
 print("Param key: %s, value: %s" % (param_obj.key, param_obj.value))
	"""
	This file contains an example of querying a run's params, metrics, and tags using the newly proposed RunData interface
	(Option 1). Any feedback is much appreciated, especially regarding query workflows that we should consider while
	designing the new interface.
	"""
	run = mlflow.get_run("...")

	# 1. Simple example of looking up metric/param/tag values
	tag_key = "mlflow.docker"
	metric_key = "rmse"
	param_key = "alpha"
	tag_val = run.data.tags[tag_key] # tag_val is a string, e.g. 'true'
	metric_val = run.data.metrics[metric_key] # metric_obj is a float value, e.g. '0.1'
	param_val = run.data.params[param_key] # param_value is a string, e.g. '1.0'
	print("Tag key: %s, value: %s" % (tag_key, tag_val))
	print("Metric key: %s, value: %s" % (metric_key, metric_val))
	print("Param key: %s, value: %s" % (param_key, param_val))

	# 2. We can access metric timestamps using the get_metric_history API, assuming it's exposed as described in the proposal
	client = mlflow.tracking.MlflowClient()
	metric_objs = client.get_metric_history(metric_key)
	print([metric.timestamp for metric in metric_objs])

	# 3. Some examples of comparing data across runs.
	# Note that comparing `run.data.metrics` across runs only considers metric values, rather than timestamp/x coordinate.
	# This may or may not be desirable - in general though, it seems unlikely for metrics to have the exact same values across
	# runs.
	new_run = mlflow.get_run("...")
	if run.data.metrics == new_run.data.metrics:
	print("Two runs had identical metrics. We might use such a comparison to decide whether to update a model, etc "
	"(although in practice it's probably unlikely that metrics would have the exact same values across runs)")

	if run.data.params == new_run.data.params:
	print("Two runs had identical params. I'm not entirely sure when such a comparison would be useful (more common "
	"might be to use the search API to query for an existing run with the same parameters before launching "
	"a new run")
	"""
	This file contains an example of querying a run's params, metrics, and tags using an alternative RunData interface
	(Option 2). Any feedback is much appreciated, especially regarding query workflows that we should consider while
	designing the new interface.
	"""
	run = mlflow.get_run("925dbee9028f415fb66d1005a9d90f2f")

	# 1. Simple example of looking up metric/param/tag values
	tag_key = "mlflow.docker"
	metric_key = "rmse"
	param_key = "alpha"
	tag_obj = run.data.tags[tag_key]
	metric_obj = run.data.metrics[metric_key]
	param_obj = run.data.params[param_key]
	print("Tag key: %s, value: %s" % (tag_key, tag_obj.value))
	print("Metric key: %s, value: %s" % (metric_key, metric_obj.value))
	print("Param key: %s, value: %s" % (param_key, param_obj.value))

	# 2. We can access the most-recent timestamps for each metric key from the metric objects in RunData
	client = mlflow.tracking.MlflowClient()
	print([metric.timestamp for metric in run.data.metrics.values()])

	# 3. Some examples of comparing data across runs. Directly comparing metrics/params/tags won't work with the current
	# entity implementations, but we can make it work by adding an "equals" implementation to the entities e.g. as in
	# https://github.com/mlflow/mlflow/pull/961/files.
	new_run = mlflow.get_run("...")
	if run.data.metrics == new_run.data.metrics:
	print("Two runs had identical metrics. We might use such a comparison to decide whether to update a model, etc "
	"(although in practice it's probably unlikely that metrics would have the exact same values across runs)")

	if run.data.params == new_run.data.params:
	print("Two runs had identical params. I'm not entirely sure when such a comparison would be useful (more common "
	"might be to use the search API to query for an existing run with the same parameters before launching "
	"a new run")
	"""
	This file contains an example of querying a run's params, metrics, and tags using the existing RunData API.
	"""
	# 1. Simple example of looking up metric/param/tag values. Note that it's quite complex to look up
	# a metric/param/tag by key.
	run = mlflow.get_run("...")
	tag_obj = [tag for tag in run.data.tags if tag.key == "mlflow.docker"][0]
	metric_obj = [metric for metric in run.data.metrics if metric.key == "rmse"][0]
	param_obj = [param for param in run.data.params if param.key == "alpha"][0]
	print("Tag key: %s, value: %s" % (tag.key, tag.value))
	print("Metric key: %s, value: %s, timestamp: %s" % (metric.key, metric.value, metric.timestamp))
	print("Param key: %s, value: %s" % (param_obj.key, param_obj.value))