Created
March 28, 2024 18:59
-
-
Save gregsheremeta/c0fff5466e3135e950819ff48a6f33a1 to your computer and use it in GitHub Desktop.
automl-tabular.yaml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# PIPELINE DEFINITION | |
# Name: automl-tabular | |
# Description: The AutoML Tabular pipeline v1. | |
# Inputs: | |
# additional_experiments: dict | |
# cv_trainer_worker_pool_specs_override: list | |
# data_source_bigquery_table_path: str [Default: ''] | |
# data_source_csv_filenames: str [Default: ''] | |
# dataflow_service_account: str [Default: ''] | |
# dataflow_subnetwork: str [Default: ''] | |
# dataflow_use_public_ips: bool [Default: True] | |
# disable_early_stopping: bool [Default: False] | |
# distill_batch_predict_machine_type: str [Default: 'n1-standard-16'] | |
# distill_batch_predict_max_replica_count: int [Default: 25.0] | |
# distill_batch_predict_starting_replica_count: int [Default: 25.0] | |
# enable_probabilistic_inference: bool [Default: False] | |
# encryption_spec_key_name: str [Default: ''] | |
# evaluation_batch_explain_machine_type: str [Default: 'n1-highmem-8'] | |
# evaluation_batch_explain_max_replica_count: int [Default: 10.0] | |
# evaluation_batch_explain_starting_replica_count: int [Default: 10.0] | |
# evaluation_batch_predict_machine_type: str [Default: 'n1-highmem-8'] | |
# evaluation_batch_predict_max_replica_count: int [Default: 20.0] | |
# evaluation_batch_predict_starting_replica_count: int [Default: 20.0] | |
# evaluation_dataflow_disk_size_gb: int [Default: 50.0] | |
# evaluation_dataflow_machine_type: str [Default: 'n1-standard-4'] | |
# evaluation_dataflow_max_num_workers: int [Default: 100.0] | |
# evaluation_dataflow_starting_num_workers: int [Default: 10.0] | |
# export_additional_model_without_custom_ops: bool [Default: False] | |
# fast_testing: bool [Default: False] | |
# location: str | |
# model_description: str [Default: ''] | |
# model_display_name: str [Default: ''] | |
# optimization_objective: str | |
# optimization_objective_precision_value: float [Default: -1.0] | |
# optimization_objective_recall_value: float [Default: -1.0] | |
# parent_model: system.Artifact | |
# predefined_split_key: str [Default: ''] | |
# prediction_type: str | |
# project: str | |
# quantiles: list | |
# root_dir: str | |
# run_distillation: bool [Default: False] | |
# run_evaluation: bool [Default: False] | |
# stage_1_num_parallel_trials: int [Default: 35.0] | |
# stage_1_tuner_worker_pool_specs_override: list | |
# stage_1_tuning_result_artifact_uri: str [Default: ''] | |
# stage_2_num_parallel_trials: int [Default: 35.0] | |
# stage_2_num_selected_trials: int [Default: 5.0] | |
# stats_and_example_gen_dataflow_disk_size_gb: int [Default: 40.0] | |
# stats_and_example_gen_dataflow_machine_type: str [Default: 'n1-standard-16'] | |
# stats_and_example_gen_dataflow_max_num_workers: int [Default: 25.0] | |
# stratified_split_key: str [Default: ''] | |
# study_spec_parameters_override: list | |
# target_column: str | |
# test_fraction: float [Default: -1.0] | |
# timestamp_split_key: str [Default: ''] | |
# train_budget_milli_node_hours: float | |
# training_fraction: float [Default: -1.0] | |
# transform_dataflow_disk_size_gb: int [Default: 40.0] | |
# transform_dataflow_machine_type: str [Default: 'n1-standard-16'] | |
# transform_dataflow_max_num_workers: int [Default: 25.0] | |
# transformations: str | |
# validation_fraction: float [Default: -1.0] | |
# vertex_dataset: system.Artifact | |
# weight_column: str [Default: ''] | |
# Outputs: | |
# feature-attribution-2-feature_attributions: system.Metrics | |
# feature-attribution-3-feature_attributions: system.Metrics | |
# feature-attribution-feature_attributions: system.Metrics | |
# model-evaluation-2-evaluation_metrics: system.Metrics | |
# model-evaluation-3-evaluation_metrics: system.Metrics | |
# model-evaluation-evaluation_metrics: system.Metrics | |
components: | |
comp-automl-tabular-cv-trainer: | |
executorLabel: exec-automl-tabular-cv-trainer | |
inputDefinitions: | |
artifacts: | |
materialized_cv_splits: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The materialized cross-validation splits. | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
tuning_result_input: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: AutoML Tabular tuning result. | |
parameters: | |
deadline_hours: | |
description: Number of hours the cross-validation trainer should run. | |
parameterType: NUMBER_DOUBLE | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
num_parallel_trials: | |
description: Number of parallel training trials. | |
parameterType: NUMBER_INTEGER | |
num_selected_features: | |
defaultValue: 0.0 | |
description: 'Number of selected features. The number of | |
features to learn in the NN models.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
num_selected_trials: | |
description: 'Number of selected trials. The number of weak | |
learners in the final model is 5 * num_selected_trials.' | |
parameterType: NUMBER_INTEGER | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
single_run_max_secs: | |
description: Max number of seconds each training trial runs. | |
parameterType: NUMBER_INTEGER | |
worker_pool_specs_override_json: | |
defaultValue: [] | |
description: 'JSON worker pool specs. E.g., | |
[{"machine_spec": {"machine_type": | |
"n1-standard-16"}},{},{},{"machine_spec": {"machine_type": | |
"n1-standard-16"}}]' | |
isOptional: true | |
parameterType: LIST | |
outputDefinitions: | |
artifacts: | |
tuning_result_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The trained model and architectures. | |
parameters: | |
execution_metrics: | |
description: Core metrics in dictionary of component execution. | |
parameterType: STRUCT | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-cv-trainer-2: | |
executorLabel: exec-automl-tabular-cv-trainer-2 | |
inputDefinitions: | |
artifacts: | |
materialized_cv_splits: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The materialized cross-validation splits. | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
tuning_result_input: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: AutoML Tabular tuning result. | |
parameters: | |
deadline_hours: | |
description: Number of hours the cross-validation trainer should run. | |
parameterType: NUMBER_DOUBLE | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
num_parallel_trials: | |
description: Number of parallel training trials. | |
parameterType: NUMBER_INTEGER | |
num_selected_features: | |
defaultValue: 0.0 | |
description: 'Number of selected features. The number of | |
features to learn in the NN models.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
num_selected_trials: | |
description: 'Number of selected trials. The number of weak | |
learners in the final model is 5 * num_selected_trials.' | |
parameterType: NUMBER_INTEGER | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
single_run_max_secs: | |
description: Max number of seconds each training trial runs. | |
parameterType: NUMBER_INTEGER | |
worker_pool_specs_override_json: | |
defaultValue: [] | |
description: 'JSON worker pool specs. E.g., | |
[{"machine_spec": {"machine_type": | |
"n1-standard-16"}},{},{},{"machine_spec": {"machine_type": | |
"n1-standard-16"}}]' | |
isOptional: true | |
parameterType: LIST | |
outputDefinitions: | |
artifacts: | |
tuning_result_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The trained model and architectures. | |
parameters: | |
execution_metrics: | |
description: Core metrics in dictionary of component execution. | |
parameterType: STRUCT | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-ensemble: | |
executorLabel: exec-automl-tabular-ensemble | |
inputDefinitions: | |
artifacts: | |
dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The schema of the dataset. | |
instance_baseline: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'The instance baseline | |
used to calculate explanations.' | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
tuning_result_input: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'AutoML Tabular tuning | |
result.' | |
warmup_data: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: 'The warm up data. Ensemble component will save the | |
warm up data together with the model artifact, used to warm up the model | |
when prediction server starts.' | |
isOptional: true | |
parameters: | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
export_additional_model_without_custom_ops: | |
defaultValue: false | |
description: 'True if export | |
an additional model without custom TF operators to the | |
`model_without_custom_ops` output.' | |
isOptional: true | |
parameterType: BOOLEAN | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
model: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The output model. | |
model_architecture: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The architecture of the output model. | |
model_without_custom_ops: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The output model without custom TF operators, this output will | |
be empty unless `export_additional_model_without_custom_ops` is set. | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
parameters: | |
explanation_metadata: | |
description: The explanation parameters used by Vertex online and batch | |
explanations. | |
parameterType: STRUCT | |
explanation_parameters: | |
parameterType: STRUCT | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-ensemble-2: | |
executorLabel: exec-automl-tabular-ensemble-2 | |
inputDefinitions: | |
artifacts: | |
dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The schema of the dataset. | |
instance_baseline: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'The instance baseline | |
used to calculate explanations.' | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
tuning_result_input: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'AutoML Tabular tuning | |
result.' | |
warmup_data: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: 'The warm up data. Ensemble component will save the | |
warm up data together with the model artifact, used to warm up the model | |
when prediction server starts.' | |
isOptional: true | |
parameters: | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
export_additional_model_without_custom_ops: | |
defaultValue: false | |
description: 'True if export | |
an additional model without custom TF operators to the | |
`model_without_custom_ops` output.' | |
isOptional: true | |
parameterType: BOOLEAN | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
model: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The output model. | |
model_architecture: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The architecture of the output model. | |
model_without_custom_ops: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The output model without custom TF operators, this output will | |
be empty unless `export_additional_model_without_custom_ops` is set. | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
parameters: | |
explanation_metadata: | |
description: The explanation parameters used by Vertex online and batch | |
explanations. | |
parameterType: STRUCT | |
explanation_parameters: | |
parameterType: STRUCT | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-ensemble-3: | |
executorLabel: exec-automl-tabular-ensemble-3 | |
inputDefinitions: | |
artifacts: | |
dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The schema of the dataset. | |
instance_baseline: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'The instance baseline | |
used to calculate explanations.' | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
tuning_result_input: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'AutoML Tabular tuning | |
result.' | |
warmup_data: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: 'The warm up data. Ensemble component will save the | |
warm up data together with the model artifact, used to warm up the model | |
when prediction server starts.' | |
isOptional: true | |
parameters: | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
export_additional_model_without_custom_ops: | |
defaultValue: false | |
description: 'True if export | |
an additional model without custom TF operators to the | |
`model_without_custom_ops` output.' | |
isOptional: true | |
parameterType: BOOLEAN | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
model: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The output model. | |
model_architecture: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The architecture of the output model. | |
model_without_custom_ops: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The output model without custom TF operators, this output will | |
be empty unless `export_additional_model_without_custom_ops` is set. | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
parameters: | |
explanation_metadata: | |
description: The explanation parameters used by Vertex online and batch | |
explanations. | |
parameterType: STRUCT | |
explanation_parameters: | |
parameterType: STRUCT | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-finalizer: | |
executorLabel: exec-automl-tabular-finalizer | |
inputDefinitions: | |
parameters: | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
outputDefinitions: | |
parameters: | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-infra-validator: | |
executorLabel: exec-automl-tabular-infra-validator | |
inputDefinitions: | |
artifacts: | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
description: 'google.UnmanagedContainerModel for model | |
to be validated.' | |
comp-automl-tabular-infra-validator-2: | |
executorLabel: exec-automl-tabular-infra-validator-2 | |
inputDefinitions: | |
artifacts: | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
description: 'google.UnmanagedContainerModel for model | |
to be validated.' | |
comp-automl-tabular-infra-validator-3: | |
executorLabel: exec-automl-tabular-infra-validator-3 | |
inputDefinitions: | |
artifacts: | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
description: 'google.UnmanagedContainerModel for model | |
to be validated.' | |
comp-automl-tabular-stage-1-tuner: | |
executorLabel: exec-automl-tabular-stage-1-tuner | |
inputDefinitions: | |
artifacts: | |
feature_ranking: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
isOptional: true | |
materialized_eval_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The materialized eval split. | |
materialized_train_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'The materialized train | |
split.' | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
parameters: | |
deadline_hours: | |
description: 'Number of hours the cross-validation trainer | |
should run.' | |
parameterType: NUMBER_DOUBLE | |
disable_early_stopping: | |
defaultValue: false | |
description: 'True if disable early stopping. Default | |
value is false.' | |
isOptional: true | |
parameterType: BOOLEAN | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
num_parallel_trials: | |
description: Number of parallel training trials. | |
parameterType: NUMBER_INTEGER | |
num_selected_features: | |
defaultValue: 0.0 | |
description: 'Number of selected features. The number of | |
features to learn in the NN models.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
num_selected_trials: | |
description: 'Number of selected trials. The number of weak | |
learners in the final model is 5 * num_selected_trials.' | |
parameterType: NUMBER_INTEGER | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
reduce_search_space_mode: | |
defaultValue: regular | |
description: 'The reduce search space mode. Possible | |
values: "regular" (default), "minimal", "full".' | |
isOptional: true | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
run_distillation: | |
defaultValue: false | |
description: 'True if in distillation mode. The default value | |
is false.' | |
isOptional: true | |
parameterType: BOOLEAN | |
single_run_max_secs: | |
description: Max number of seconds each training trial runs. | |
parameterType: NUMBER_INTEGER | |
study_spec_parameters_override: | |
defaultValue: [] | |
description: 'JSON study spec. E.g., | |
[{"parameter_id": "model_type","categorical_value_spec": {"values": | |
["nn"]}}]' | |
isOptional: true | |
parameterType: LIST | |
tune_feature_selection_rate: | |
defaultValue: false | |
isOptional: true | |
parameterType: BOOLEAN | |
worker_pool_specs_override_json: | |
defaultValue: [] | |
description: 'JSON worker pool specs. E.g., | |
[{"machine_spec": {"machine_type": | |
"n1-standard-16"}},{},{},{"machine_spec": {"machine_type": | |
"n1-standard-16"}}]' | |
isOptional: true | |
parameterType: LIST | |
outputDefinitions: | |
artifacts: | |
tuning_result_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The trained model and architectures. | |
parameters: | |
execution_metrics: | |
description: Core metrics in dictionary of component execution. | |
parameterType: STRUCT | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-stage-1-tuner-2: | |
executorLabel: exec-automl-tabular-stage-1-tuner-2 | |
inputDefinitions: | |
artifacts: | |
feature_ranking: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
isOptional: true | |
materialized_eval_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The materialized eval split. | |
materialized_train_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'The materialized train | |
split.' | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
parameters: | |
deadline_hours: | |
description: 'Number of hours the cross-validation trainer | |
should run.' | |
parameterType: NUMBER_DOUBLE | |
disable_early_stopping: | |
defaultValue: false | |
description: 'True if disable early stopping. Default | |
value is false.' | |
isOptional: true | |
parameterType: BOOLEAN | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
num_parallel_trials: | |
description: Number of parallel training trials. | |
parameterType: NUMBER_INTEGER | |
num_selected_features: | |
defaultValue: 0.0 | |
description: 'Number of selected features. The number of | |
features to learn in the NN models.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
num_selected_trials: | |
description: 'Number of selected trials. The number of weak | |
learners in the final model is 5 * num_selected_trials.' | |
parameterType: NUMBER_INTEGER | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
reduce_search_space_mode: | |
defaultValue: regular | |
description: 'The reduce search space mode. Possible | |
values: "regular" (default), "minimal", "full".' | |
isOptional: true | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
run_distillation: | |
defaultValue: false | |
description: 'True if in distillation mode. The default value | |
is false.' | |
isOptional: true | |
parameterType: BOOLEAN | |
single_run_max_secs: | |
description: Max number of seconds each training trial runs. | |
parameterType: NUMBER_INTEGER | |
study_spec_parameters_override: | |
defaultValue: [] | |
description: 'JSON study spec. E.g., | |
[{"parameter_id": "model_type","categorical_value_spec": {"values": | |
["nn"]}}]' | |
isOptional: true | |
parameterType: LIST | |
tune_feature_selection_rate: | |
defaultValue: false | |
isOptional: true | |
parameterType: BOOLEAN | |
worker_pool_specs_override_json: | |
defaultValue: [] | |
description: 'JSON worker pool specs. E.g., | |
[{"machine_spec": {"machine_type": | |
"n1-standard-16"}},{},{},{"machine_spec": {"machine_type": | |
"n1-standard-16"}}]' | |
isOptional: true | |
parameterType: LIST | |
outputDefinitions: | |
artifacts: | |
tuning_result_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The trained model and architectures. | |
parameters: | |
execution_metrics: | |
description: Core metrics in dictionary of component execution. | |
parameterType: STRUCT | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-transform: | |
executorLabel: exec-automl-tabular-transform | |
inputDefinitions: | |
artifacts: | |
dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The schema of the dataset. | |
eval_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The eval split. | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
test_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The test split. | |
train_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The train split. | |
parameters: | |
dataflow_disk_size_gb: | |
defaultValue: 40.0 | |
description: 'The disk size, in gigabytes, to use | |
on each Dataflow worker instance. If not set, default to 40.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-16 | |
description: 'The machine type used for dataflow | |
jobs. If not set, default to n1-standard-16.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_num_workers: | |
defaultValue: 25.0 | |
description: 'The number of workers to run the | |
dataflow job. If not set, default to 25.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
description: 'Custom service account to run | |
dataflow jobs.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
description: 'Dataflow''s fully qualified subnetwork | |
name, when empty the default subnetwork will be used. More | |
details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
description: 'Specifies whether Dataflow | |
workers use public IP addresses.' | |
isOptional: true | |
parameterType: BOOLEAN | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
materialized_eval_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The materialized test split. | |
materialized_test_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
materialized_train_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The materialized train split. | |
training_schema_uri: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The training schema. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
parameters: | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-automl-tabular-transform-2: | |
executorLabel: exec-automl-tabular-transform-2 | |
inputDefinitions: | |
artifacts: | |
dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The schema of the dataset. | |
eval_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The eval split. | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
test_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The test split. | |
train_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The train split. | |
parameters: | |
dataflow_disk_size_gb: | |
defaultValue: 40.0 | |
description: 'The disk size, in gigabytes, to use | |
on each Dataflow worker instance. If not set, default to 40.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-16 | |
description: 'The machine type used for dataflow | |
jobs. If not set, default to n1-standard-16.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_num_workers: | |
defaultValue: 25.0 | |
description: 'The number of workers to run the | |
dataflow job. If not set, default to 25.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
description: 'Custom service account to run | |
dataflow jobs.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
description: 'Dataflow''s fully qualified subnetwork | |
name, when empty the default subnetwork will be used. More | |
details: https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
description: 'Specifies whether Dataflow | |
workers use public IP addresses.' | |
isOptional: true | |
parameterType: BOOLEAN | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
location: | |
description: Location for running the Cross-validation trainer. | |
parameterType: STRING | |
project: | |
description: Project to run Cross-validation trainer. | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
materialized_eval_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The materialized test split. | |
materialized_test_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
materialized_train_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The materialized train split. | |
training_schema_uri: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The training schema. | |
transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The transform output artifact. | |
parameters: | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-bool-identity: | |
executorLabel: exec-bool-identity | |
inputDefinitions: | |
parameters: | |
value: | |
description: Boolean value to return | |
parameterType: BOOLEAN | |
outputDefinitions: | |
parameters: | |
Output: | |
parameterType: STRING | |
comp-bool-identity-2: | |
executorLabel: exec-bool-identity-2 | |
inputDefinitions: | |
parameters: | |
value: | |
description: Boolean value to return | |
parameterType: BOOLEAN | |
outputDefinitions: | |
parameters: | |
Output: | |
parameterType: STRING | |
comp-bool-identity-3: | |
executorLabel: exec-bool-identity-3 | |
inputDefinitions: | |
parameters: | |
value: | |
description: Boolean value to return | |
parameterType: BOOLEAN | |
outputDefinitions: | |
parameters: | |
Output: | |
parameterType: STRING | |
comp-calculate-training-parameters: | |
executorLabel: exec-calculate-training-parameters | |
inputDefinitions: | |
parameters: | |
fast_testing: | |
defaultValue: false | |
description: Internal flag used for presubmit tests. | |
isOptional: true | |
parameterType: BOOLEAN | |
is_skip_architecture_search: | |
defaultValue: false | |
description: 'If component is being called in the | |
skip_architecture_search pipeline.' | |
isOptional: true | |
parameterType: BOOLEAN | |
run_distillation: | |
description: Whether to run distill in the training pipeline. | |
parameterType: BOOLEAN | |
stage_1_num_parallel_trials: | |
description: Number of parallel trails for stage 1. | |
parameterType: NUMBER_INTEGER | |
stage_2_num_parallel_trials: | |
description: Number of parallel trails for stage 2. | |
parameterType: NUMBER_INTEGER | |
train_budget_milli_node_hours: | |
description: 'The train budget of creating this model, | |
expressed in milli node hours i.e. 1,000 value in this field means 1 node | |
hour.' | |
parameterType: NUMBER_DOUBLE | |
outputDefinitions: | |
parameters: | |
distill_stage_1_deadline_hours: | |
parameterType: NUMBER_DOUBLE | |
reduce_search_space_mode: | |
parameterType: STRING | |
stage_1_deadline_hours: | |
parameterType: NUMBER_DOUBLE | |
stage_1_num_selected_trials: | |
parameterType: NUMBER_INTEGER | |
stage_1_single_run_max_secs: | |
parameterType: NUMBER_INTEGER | |
stage_2_deadline_hours: | |
parameterType: NUMBER_DOUBLE | |
stage_2_single_run_max_secs: | |
parameterType: NUMBER_INTEGER | |
comp-calculate-training-parameters-2: | |
executorLabel: exec-calculate-training-parameters-2 | |
inputDefinitions: | |
parameters: | |
fast_testing: | |
defaultValue: false | |
description: Internal flag used for presubmit tests. | |
isOptional: true | |
parameterType: BOOLEAN | |
is_skip_architecture_search: | |
defaultValue: false | |
description: 'If component is being called in the | |
skip_architecture_search pipeline.' | |
isOptional: true | |
parameterType: BOOLEAN | |
run_distillation: | |
description: Whether to run distill in the training pipeline. | |
parameterType: BOOLEAN | |
stage_1_num_parallel_trials: | |
description: Number of parallel trails for stage 1. | |
parameterType: NUMBER_INTEGER | |
stage_2_num_parallel_trials: | |
description: Number of parallel trails for stage 2. | |
parameterType: NUMBER_INTEGER | |
train_budget_milli_node_hours: | |
description: 'The train budget of creating this model, | |
expressed in milli node hours i.e. 1,000 value in this field means 1 node | |
hour.' | |
parameterType: NUMBER_DOUBLE | |
outputDefinitions: | |
parameters: | |
distill_stage_1_deadline_hours: | |
parameterType: NUMBER_DOUBLE | |
reduce_search_space_mode: | |
parameterType: STRING | |
stage_1_deadline_hours: | |
parameterType: NUMBER_DOUBLE | |
stage_1_num_selected_trials: | |
parameterType: NUMBER_INTEGER | |
stage_1_single_run_max_secs: | |
parameterType: NUMBER_INTEGER | |
stage_2_deadline_hours: | |
parameterType: NUMBER_DOUBLE | |
stage_2_single_run_max_secs: | |
parameterType: NUMBER_INTEGER | |
comp-condition-2: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-feature_attributions | |
producerSubtask: condition-3 | |
model-evaluation-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-evaluation_metrics | |
producerSubtask: condition-3 | |
tasks: | |
automl-tabular-cv-trainer: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-cv-trainer | |
dependentTasks: | |
- calculate-training-parameters | |
- importer | |
inputs: | |
artifacts: | |
materialized_cv_splits: | |
componentInputArtifact: pipelinechannel--merge-materialized-splits-splits | |
metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
transform_output: | |
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output | |
tuning_result_input: | |
taskOutputArtifact: | |
outputArtifactKey: artifact | |
producerTask: importer | |
parameters: | |
deadline_hours: | |
taskOutputParameter: | |
outputParameterKey: stage_2_deadline_hours | |
producerTask: calculate-training-parameters | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials | |
num_selected_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_selected_trials | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
single_run_max_secs: | |
taskOutputParameter: | |
outputParameterKey: stage_2_single_run_max_secs | |
producerTask: calculate-training-parameters | |
worker_pool_specs_override_json: | |
componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override | |
taskInfo: | |
name: automl-tabular-cv-trainer | |
automl-tabular-ensemble: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-ensemble | |
dependentTasks: | |
- automl-tabular-cv-trainer | |
inputs: | |
artifacts: | |
dataset_schema: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema | |
instance_baseline: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline | |
metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
transform_output: | |
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output | |
tuning_result_input: | |
taskOutputArtifact: | |
outputArtifactKey: tuning_result_output | |
producerTask: automl-tabular-cv-trainer | |
warmup_data: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
export_additional_model_without_custom_ops: | |
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops | |
location: | |
componentInputParameter: pipelinechannel--location | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
taskInfo: | |
name: automl-tabular-ensemble | |
automl-tabular-infra-validator: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-infra-validator | |
dependentTasks: | |
- automl-tabular-ensemble | |
inputs: | |
artifacts: | |
unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble | |
taskInfo: | |
name: automl-tabular-infra-validator | |
bool-identity: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-bool-identity | |
inputs: | |
parameters: | |
value: | |
componentInputParameter: pipelinechannel--run_evaluation | |
taskInfo: | |
name: bool-identity | |
calculate-training-parameters: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-calculate-training-parameters | |
inputs: | |
parameters: | |
fast_testing: | |
componentInputParameter: pipelinechannel--fast_testing | |
is_skip_architecture_search: | |
runtimeValue: | |
constant: 1.0 | |
run_distillation: | |
componentInputParameter: pipelinechannel--run_distillation | |
stage_1_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials | |
stage_2_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials | |
train_budget_milli_node_hours: | |
componentInputParameter: pipelinechannel--train_budget_milli_node_hours | |
taskInfo: | |
name: calculate-training-parameters | |
condition-3: | |
componentRef: | |
name: comp-condition-3 | |
dependentTasks: | |
- automl-tabular-ensemble | |
- bool-identity | |
- model-upload | |
inputs: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: | |
taskOutputArtifact: | |
outputArtifactKey: explanation_metadata_artifact | |
producerTask: automl-tabular-ensemble | |
pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble | |
pipelinechannel--model-upload-model: | |
taskOutputArtifact: | |
outputArtifactKey: model | |
producerTask: model-upload | |
parameters: | |
pipelinechannel--automl-tabular-ensemble-explanation_parameters: | |
taskOutputParameter: | |
outputParameterKey: explanation_parameters | |
producerTask: automl-tabular-ensemble | |
pipelinechannel--bool-identity-Output: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: bool-identity | |
pipelinechannel--dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
pipelinechannel--dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
pipelinechannel--dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
pipelinechannel--encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
pipelinechannel--evaluation_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
pipelinechannel--location: | |
componentInputParameter: pipelinechannel--location | |
pipelinechannel--prediction_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
pipelinechannel--project: | |
componentInputParameter: pipelinechannel--project | |
pipelinechannel--root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
pipelinechannel--string-not-empty-Output: | |
componentInputParameter: pipelinechannel--string-not-empty-Output | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
pipelinechannel--target_column: | |
componentInputParameter: pipelinechannel--target_column | |
taskInfo: | |
name: is-evaluation | |
triggerPolicy: | |
condition: inputs.parameter_values['pipelinechannel--bool-identity-Output'] | |
== 'true' | |
importer: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-importer | |
inputs: | |
parameters: | |
uri: | |
componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri | |
taskInfo: | |
name: importer | |
model-upload: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-upload | |
dependentTasks: | |
- automl-tabular-ensemble | |
inputs: | |
artifacts: | |
explanation_metadata_artifact: | |
taskOutputArtifact: | |
outputArtifactKey: explanation_metadata_artifact | |
producerTask: automl-tabular-ensemble | |
parent_model: | |
componentInputArtifact: pipelinechannel--parent_model | |
unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble | |
parameters: | |
description: | |
componentInputParameter: pipelinechannel--model_description | |
display_name: | |
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
explanation_parameters: | |
taskOutputParameter: | |
outputParameterKey: explanation_parameters | |
producerTask: automl-tabular-ensemble | |
location: | |
componentInputParameter: pipelinechannel--location | |
project: | |
componentInputParameter: pipelinechannel--project | |
taskInfo: | |
name: model-upload | |
inputDefinitions: | |
artifacts: | |
pipelinechannel--automl-tabular-transform-transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--merge-materialized-splits-splits: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--parent_model: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-eval_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-instance_baseline: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
parameters: | |
pipelinechannel--cv_trainer_worker_pool_specs_override: | |
parameterType: LIST | |
pipelinechannel--dataflow_service_account: | |
parameterType: STRING | |
pipelinechannel--dataflow_subnetwork: | |
parameterType: STRING | |
pipelinechannel--dataflow_use_public_ips: | |
parameterType: BOOLEAN | |
pipelinechannel--encryption_spec_key_name: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--export_additional_model_without_custom_ops: | |
parameterType: BOOLEAN | |
pipelinechannel--fast_testing: | |
parameterType: BOOLEAN | |
pipelinechannel--location: | |
parameterType: STRING | |
pipelinechannel--model_description: | |
parameterType: STRING | |
pipelinechannel--prediction_type: | |
parameterType: STRING | |
pipelinechannel--project: | |
parameterType: STRING | |
pipelinechannel--root_dir: | |
parameterType: STRING | |
pipelinechannel--run_distillation: | |
parameterType: BOOLEAN | |
pipelinechannel--run_evaluation: | |
parameterType: BOOLEAN | |
pipelinechannel--set-optional-inputs-model_display_name: | |
parameterType: STRING | |
pipelinechannel--stage_1_num_parallel_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stage_1_tuning_result_artifact_uri: | |
parameterType: STRING | |
pipelinechannel--stage_2_num_parallel_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stage_2_num_selected_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--string-not-empty-Output: | |
parameterType: STRING | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
parameterType: LIST | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
parameterType: LIST | |
pipelinechannel--target_column: | |
parameterType: STRING | |
pipelinechannel--train_budget_milli_node_hours: | |
parameterType: NUMBER_DOUBLE | |
outputDefinitions: | |
artifacts: | |
feature-attribution-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
comp-condition-3: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature_attributions | |
producerSubtask: feature-attribution | |
model-evaluation-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: evaluation_metrics | |
producerSubtask: model-evaluation | |
tasks: | |
feature-attribution: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-feature-attribution | |
dependentTasks: | |
- model-batch-explanation | |
inputs: | |
artifacts: | |
predictions_gcs_source: | |
taskOutputArtifact: | |
outputArtifactKey: gcs_output_directory | |
producerTask: model-batch-explanation | |
parameters: | |
dataflow_disk_size: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
dataflow_max_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
dataflow_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
taskInfo: | |
name: feature-attribution | |
model-batch-explanation: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-batch-explanation | |
inputs: | |
artifacts: | |
explanation_metadata_artifact: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
explanation_parameters: | |
componentInputParameter: pipelinechannel--automl-tabular-ensemble-explanation_parameters | |
gcs_destination_output_uri_prefix: | |
componentInputParameter: pipelinechannel--root_dir | |
gcs_source_uris: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json | |
generate_explanation: | |
runtimeValue: | |
constant: 1.0 | |
instances_format: | |
runtimeValue: | |
constant: tf-record | |
job_display_name: | |
runtimeValue: | |
constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
location: | |
componentInputParameter: pipelinechannel--location | |
machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
taskInfo: | |
name: model-batch-explanation | |
model-batch-predict: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-batch-predict | |
inputs: | |
artifacts: | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-unmanaged_container_model | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
gcs_destination_output_uri_prefix: | |
componentInputParameter: pipelinechannel--root_dir | |
gcs_source_uris: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
instances_format: | |
runtimeValue: | |
constant: tf-record | |
job_display_name: | |
runtimeValue: | |
constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
location: | |
componentInputParameter: pipelinechannel--location | |
machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
taskInfo: | |
name: model-batch-predict | |
model-evaluation: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-evaluation | |
dependentTasks: | |
- model-batch-predict | |
inputs: | |
artifacts: | |
batch_prediction_job: | |
taskOutputArtifact: | |
outputArtifactKey: batchpredictionjob | |
producerTask: model-batch-predict | |
parameters: | |
dataflow_disk_size: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
dataflow_max_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
dataflow_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
ground_truth_column: | |
componentInputParameter: pipelinechannel--target_column | |
ground_truth_format: | |
runtimeValue: | |
constant: jsonl | |
location: | |
componentInputParameter: pipelinechannel--location | |
prediction_label_column: | |
runtimeValue: | |
constant: '' | |
prediction_score_column: | |
runtimeValue: | |
constant: '' | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
problem_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
taskInfo: | |
name: model-evaluation | |
model-evaluation-import: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-evaluation-import | |
dependentTasks: | |
- feature-attribution | |
- model-evaluation | |
inputs: | |
artifacts: | |
feature_attributions: | |
taskOutputArtifact: | |
outputArtifactKey: feature_attributions | |
producerTask: feature-attribution | |
metrics: | |
taskOutputArtifact: | |
outputArtifactKey: evaluation_metrics | |
producerTask: model-evaluation | |
model: | |
componentInputArtifact: pipelinechannel--model-upload-model | |
parameters: | |
dataset_paths: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
dataset_type: | |
runtimeValue: | |
constant: tf-record | |
display_name: | |
runtimeValue: | |
constant: AutoML Tabular | |
problem_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
taskInfo: | |
name: model-evaluation-import | |
inputDefinitions: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--automl-tabular-ensemble-unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
pipelinechannel--model-upload-model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
parameters: | |
pipelinechannel--automl-tabular-ensemble-explanation_parameters: | |
parameterType: STRUCT | |
pipelinechannel--bool-identity-Output: | |
parameterType: STRING | |
pipelinechannel--dataflow_service_account: | |
parameterType: STRING | |
pipelinechannel--dataflow_subnetwork: | |
parameterType: STRING | |
pipelinechannel--dataflow_use_public_ips: | |
parameterType: BOOLEAN | |
pipelinechannel--encryption_spec_key_name: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--location: | |
parameterType: STRING | |
pipelinechannel--prediction_type: | |
parameterType: STRING | |
pipelinechannel--project: | |
parameterType: STRING | |
pipelinechannel--root_dir: | |
parameterType: STRING | |
pipelinechannel--string-not-empty-Output: | |
parameterType: STRING | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
parameterType: LIST | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
parameterType: LIST | |
pipelinechannel--target_column: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature-attribution-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
comp-condition-4: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-2-feature_attributions | |
producerSubtask: condition-5 | |
feature-attribution-3-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-3-feature_attributions | |
producerSubtask: condition-7 | |
model-evaluation-2-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-2-evaluation_metrics | |
producerSubtask: condition-5 | |
model-evaluation-3-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-3-evaluation_metrics | |
producerSubtask: condition-7 | |
tasks: | |
automl-tabular-cv-trainer-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-cv-trainer-2 | |
dependentTasks: | |
- automl-tabular-stage-1-tuner | |
- calculate-training-parameters-2 | |
inputs: | |
artifacts: | |
materialized_cv_splits: | |
componentInputArtifact: pipelinechannel--merge-materialized-splits-splits | |
metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
transform_output: | |
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output | |
tuning_result_input: | |
taskOutputArtifact: | |
outputArtifactKey: tuning_result_output | |
producerTask: automl-tabular-stage-1-tuner | |
parameters: | |
deadline_hours: | |
taskOutputParameter: | |
outputParameterKey: stage_2_deadline_hours | |
producerTask: calculate-training-parameters-2 | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials | |
num_selected_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_selected_trials | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
single_run_max_secs: | |
taskOutputParameter: | |
outputParameterKey: stage_2_single_run_max_secs | |
producerTask: calculate-training-parameters-2 | |
worker_pool_specs_override_json: | |
componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override | |
taskInfo: | |
name: automl-tabular-cv-trainer-2 | |
automl-tabular-ensemble-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-ensemble-2 | |
dependentTasks: | |
- automl-tabular-cv-trainer-2 | |
inputs: | |
artifacts: | |
dataset_schema: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema | |
instance_baseline: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline | |
metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
transform_output: | |
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output | |
tuning_result_input: | |
taskOutputArtifact: | |
outputArtifactKey: tuning_result_output | |
producerTask: automl-tabular-cv-trainer-2 | |
warmup_data: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
export_additional_model_without_custom_ops: | |
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops | |
location: | |
componentInputParameter: pipelinechannel--location | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
taskInfo: | |
name: automl-tabular-ensemble-2 | |
automl-tabular-infra-validator-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-infra-validator-2 | |
dependentTasks: | |
- automl-tabular-ensemble-2 | |
inputs: | |
artifacts: | |
unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble-2 | |
taskInfo: | |
name: automl-tabular-infra-validator-2 | |
automl-tabular-stage-1-tuner: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-stage-1-tuner | |
dependentTasks: | |
- calculate-training-parameters-2 | |
inputs: | |
artifacts: | |
materialized_eval_split: | |
componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_eval_split | |
materialized_train_split: | |
componentInputArtifact: pipelinechannel--automl-tabular-transform-materialized_train_split | |
metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
transform_output: | |
componentInputArtifact: pipelinechannel--automl-tabular-transform-transform_output | |
parameters: | |
deadline_hours: | |
taskOutputParameter: | |
outputParameterKey: stage_1_deadline_hours | |
producerTask: calculate-training-parameters-2 | |
disable_early_stopping: | |
componentInputParameter: pipelinechannel--disable_early_stopping | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials | |
num_selected_trials: | |
taskOutputParameter: | |
outputParameterKey: stage_1_num_selected_trials | |
producerTask: calculate-training-parameters-2 | |
project: | |
componentInputParameter: pipelinechannel--project | |
reduce_search_space_mode: | |
taskOutputParameter: | |
outputParameterKey: reduce_search_space_mode | |
producerTask: calculate-training-parameters-2 | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
single_run_max_secs: | |
taskOutputParameter: | |
outputParameterKey: stage_1_single_run_max_secs | |
producerTask: calculate-training-parameters-2 | |
study_spec_parameters_override: | |
componentInputParameter: pipelinechannel--study_spec_parameters_override | |
worker_pool_specs_override_json: | |
componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override | |
taskInfo: | |
name: automl-tabular-stage-1-tuner | |
bool-identity-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-bool-identity-2 | |
inputs: | |
parameters: | |
value: | |
componentInputParameter: pipelinechannel--run_evaluation | |
taskInfo: | |
name: bool-identity-2 | |
bool-identity-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-bool-identity-3 | |
inputs: | |
parameters: | |
value: | |
componentInputParameter: pipelinechannel--run_distillation | |
taskInfo: | |
name: bool-identity-3 | |
calculate-training-parameters-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-calculate-training-parameters-2 | |
inputs: | |
parameters: | |
fast_testing: | |
componentInputParameter: pipelinechannel--fast_testing | |
is_skip_architecture_search: | |
runtimeValue: | |
constant: 0.0 | |
run_distillation: | |
componentInputParameter: pipelinechannel--run_distillation | |
stage_1_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials | |
stage_2_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials | |
train_budget_milli_node_hours: | |
componentInputParameter: pipelinechannel--train_budget_milli_node_hours | |
taskInfo: | |
name: calculate-training-parameters-2 | |
condition-5: | |
componentRef: | |
name: comp-condition-5 | |
dependentTasks: | |
- automl-tabular-ensemble-2 | |
- bool-identity-2 | |
- bool-identity-3 | |
inputs: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: | |
taskOutputArtifact: | |
outputArtifactKey: explanation_metadata_artifact | |
producerTask: automl-tabular-ensemble-2 | |
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble-2 | |
pipelinechannel--parent_model: | |
componentInputArtifact: pipelinechannel--parent_model | |
parameters: | |
pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: | |
taskOutputParameter: | |
outputParameterKey: explanation_parameters | |
producerTask: automl-tabular-ensemble-2 | |
pipelinechannel--bool-identity-2-Output: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: bool-identity-2 | |
pipelinechannel--bool-identity-3-Output: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: bool-identity-3 | |
pipelinechannel--dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
pipelinechannel--dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
pipelinechannel--dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
pipelinechannel--encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
pipelinechannel--evaluation_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
pipelinechannel--location: | |
componentInputParameter: pipelinechannel--location | |
pipelinechannel--model_description: | |
componentInputParameter: pipelinechannel--model_description | |
pipelinechannel--prediction_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
pipelinechannel--project: | |
componentInputParameter: pipelinechannel--project | |
pipelinechannel--root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
pipelinechannel--set-optional-inputs-model_display_name: | |
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name | |
pipelinechannel--string-not-empty-Output: | |
componentInputParameter: pipelinechannel--string-not-empty-Output | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
pipelinechannel--target_column: | |
componentInputParameter: pipelinechannel--target_column | |
taskInfo: | |
name: no-distill | |
triggerPolicy: | |
condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] | |
== 'false' | |
condition-7: | |
componentRef: | |
name: comp-condition-7 | |
dependentTasks: | |
- automl-tabular-ensemble-2 | |
- bool-identity-2 | |
- bool-identity-3 | |
- calculate-training-parameters-2 | |
inputs: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble-2 | |
pipelinechannel--tabular-stats-and-example-gen-dataset_schema: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema | |
pipelinechannel--tabular-stats-and-example-gen-eval_split: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split | |
pipelinechannel--tabular-stats-and-example-gen-instance_baseline: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline | |
pipelinechannel--tabular-stats-and-example-gen-metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
pipelinechannel--tabular-stats-and-example-gen-test_split: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split | |
pipelinechannel--tabular-stats-and-example-gen-train_split: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split | |
parameters: | |
pipelinechannel--bool-identity-2-Output: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: bool-identity-2 | |
pipelinechannel--bool-identity-3-Output: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: bool-identity-3 | |
pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: | |
taskOutputParameter: | |
outputParameterKey: distill_stage_1_deadline_hours | |
producerTask: calculate-training-parameters-2 | |
pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: | |
taskOutputParameter: | |
outputParameterKey: reduce_search_space_mode | |
producerTask: calculate-training-parameters-2 | |
pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: | |
taskOutputParameter: | |
outputParameterKey: stage_1_single_run_max_secs | |
producerTask: calculate-training-parameters-2 | |
pipelinechannel--dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
pipelinechannel--dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
pipelinechannel--dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
pipelinechannel--disable_early_stopping: | |
componentInputParameter: pipelinechannel--disable_early_stopping | |
pipelinechannel--distill_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--distill_batch_predict_machine_type | |
pipelinechannel--distill_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count | |
pipelinechannel--distill_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count | |
pipelinechannel--encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
pipelinechannel--evaluation_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
pipelinechannel--export_additional_model_without_custom_ops: | |
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops | |
pipelinechannel--location: | |
componentInputParameter: pipelinechannel--location | |
pipelinechannel--prediction_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
pipelinechannel--project: | |
componentInputParameter: pipelinechannel--project | |
pipelinechannel--root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
pipelinechannel--stage_1_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials | |
pipelinechannel--stage_1_tuner_worker_pool_specs_override: | |
componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override | |
pipelinechannel--string-not-empty-Output: | |
componentInputParameter: pipelinechannel--string-not-empty-Output | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
pipelinechannel--target_column: | |
componentInputParameter: pipelinechannel--target_column | |
pipelinechannel--transform_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb | |
pipelinechannel--transform_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--transform_dataflow_machine_type | |
pipelinechannel--transform_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers | |
taskInfo: | |
name: is-distill | |
triggerPolicy: | |
condition: inputs.parameter_values['pipelinechannel--bool-identity-3-Output'] | |
== 'true' | |
inputDefinitions: | |
artifacts: | |
pipelinechannel--automl-tabular-transform-materialized_eval_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--automl-tabular-transform-materialized_train_split: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--automl-tabular-transform-transform_output: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--merge-materialized-splits-splits: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--parent_model: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-eval_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-instance_baseline: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-test_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-train_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
parameters: | |
pipelinechannel--cv_trainer_worker_pool_specs_override: | |
parameterType: LIST | |
pipelinechannel--dataflow_service_account: | |
parameterType: STRING | |
pipelinechannel--dataflow_subnetwork: | |
parameterType: STRING | |
pipelinechannel--dataflow_use_public_ips: | |
parameterType: BOOLEAN | |
pipelinechannel--disable_early_stopping: | |
parameterType: BOOLEAN | |
pipelinechannel--distill_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--distill_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--distill_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--encryption_spec_key_name: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--export_additional_model_without_custom_ops: | |
parameterType: BOOLEAN | |
pipelinechannel--fast_testing: | |
parameterType: BOOLEAN | |
pipelinechannel--location: | |
parameterType: STRING | |
pipelinechannel--model_description: | |
parameterType: STRING | |
pipelinechannel--prediction_type: | |
parameterType: STRING | |
pipelinechannel--project: | |
parameterType: STRING | |
pipelinechannel--root_dir: | |
parameterType: STRING | |
pipelinechannel--run_distillation: | |
parameterType: BOOLEAN | |
pipelinechannel--run_evaluation: | |
parameterType: BOOLEAN | |
pipelinechannel--set-optional-inputs-model_display_name: | |
parameterType: STRING | |
pipelinechannel--stage_1_num_parallel_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stage_1_tuner_worker_pool_specs_override: | |
parameterType: LIST | |
pipelinechannel--stage_2_num_parallel_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stage_2_num_selected_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--string-not-empty-Output: | |
parameterType: STRING | |
pipelinechannel--study_spec_parameters_override: | |
parameterType: LIST | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
parameterType: LIST | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
parameterType: LIST | |
pipelinechannel--target_column: | |
parameterType: STRING | |
pipelinechannel--train_budget_milli_node_hours: | |
parameterType: NUMBER_DOUBLE | |
pipelinechannel--transform_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--transform_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--transform_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
feature-attribution-3-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-2-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-3-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
comp-condition-5: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-2-feature_attributions | |
producerSubtask: condition-6 | |
model-evaluation-2-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-2-evaluation_metrics | |
producerSubtask: condition-6 | |
tasks: | |
condition-6: | |
componentRef: | |
name: comp-condition-6 | |
dependentTasks: | |
- model-upload-2 | |
inputs: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact | |
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model | |
pipelinechannel--model-upload-2-model: | |
taskOutputArtifact: | |
outputArtifactKey: model | |
producerTask: model-upload-2 | |
parameters: | |
pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: | |
componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters | |
pipelinechannel--bool-identity-2-Output: | |
componentInputParameter: pipelinechannel--bool-identity-2-Output | |
pipelinechannel--bool-identity-3-Output: | |
componentInputParameter: pipelinechannel--bool-identity-3-Output | |
pipelinechannel--dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
pipelinechannel--dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
pipelinechannel--dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
pipelinechannel--encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
pipelinechannel--evaluation_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
pipelinechannel--location: | |
componentInputParameter: pipelinechannel--location | |
pipelinechannel--prediction_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
pipelinechannel--project: | |
componentInputParameter: pipelinechannel--project | |
pipelinechannel--root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
pipelinechannel--string-not-empty-Output: | |
componentInputParameter: pipelinechannel--string-not-empty-Output | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
pipelinechannel--target_column: | |
componentInputParameter: pipelinechannel--target_column | |
taskInfo: | |
name: is-evaluation | |
triggerPolicy: | |
condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] | |
== 'true' | |
model-upload-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-upload-2 | |
inputs: | |
artifacts: | |
explanation_metadata_artifact: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact | |
parent_model: | |
componentInputArtifact: pipelinechannel--parent_model | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model | |
parameters: | |
description: | |
componentInputParameter: pipelinechannel--model_description | |
display_name: | |
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
explanation_parameters: | |
componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters | |
location: | |
componentInputParameter: pipelinechannel--location | |
project: | |
componentInputParameter: pipelinechannel--project | |
taskInfo: | |
name: model-upload-2 | |
inputDefinitions: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
pipelinechannel--parent_model: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
parameters: | |
pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: | |
parameterType: STRUCT | |
pipelinechannel--bool-identity-2-Output: | |
parameterType: STRING | |
pipelinechannel--bool-identity-3-Output: | |
parameterType: STRING | |
pipelinechannel--dataflow_service_account: | |
parameterType: STRING | |
pipelinechannel--dataflow_subnetwork: | |
parameterType: STRING | |
pipelinechannel--dataflow_use_public_ips: | |
parameterType: BOOLEAN | |
pipelinechannel--encryption_spec_key_name: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--location: | |
parameterType: STRING | |
pipelinechannel--model_description: | |
parameterType: STRING | |
pipelinechannel--prediction_type: | |
parameterType: STRING | |
pipelinechannel--project: | |
parameterType: STRING | |
pipelinechannel--root_dir: | |
parameterType: STRING | |
pipelinechannel--set-optional-inputs-model_display_name: | |
parameterType: STRING | |
pipelinechannel--string-not-empty-Output: | |
parameterType: STRING | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
parameterType: LIST | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
parameterType: LIST | |
pipelinechannel--target_column: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-2-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
comp-condition-6: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature_attributions | |
producerSubtask: feature-attribution-2 | |
model-evaluation-2-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: evaluation_metrics | |
producerSubtask: model-evaluation-2 | |
tasks: | |
feature-attribution-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-feature-attribution-2 | |
dependentTasks: | |
- model-batch-explanation-2 | |
inputs: | |
artifacts: | |
predictions_gcs_source: | |
taskOutputArtifact: | |
outputArtifactKey: gcs_output_directory | |
producerTask: model-batch-explanation-2 | |
parameters: | |
dataflow_disk_size: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
dataflow_max_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
dataflow_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
taskInfo: | |
name: feature-attribution-2 | |
model-batch-explanation-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-batch-explanation-2 | |
inputs: | |
artifacts: | |
explanation_metadata_artifact: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
explanation_parameters: | |
componentInputParameter: pipelinechannel--automl-tabular-ensemble-2-explanation_parameters | |
gcs_destination_output_uri_prefix: | |
componentInputParameter: pipelinechannel--root_dir | |
gcs_source_uris: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json | |
generate_explanation: | |
runtimeValue: | |
constant: 1.0 | |
instances_format: | |
runtimeValue: | |
constant: tf-record | |
job_display_name: | |
runtimeValue: | |
constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
location: | |
componentInputParameter: pipelinechannel--location | |
machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
taskInfo: | |
name: model-batch-explanation-2 | |
model-batch-predict-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-batch-predict-2 | |
inputs: | |
artifacts: | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
gcs_destination_output_uri_prefix: | |
componentInputParameter: pipelinechannel--root_dir | |
gcs_source_uris: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
instances_format: | |
runtimeValue: | |
constant: tf-record | |
job_display_name: | |
runtimeValue: | |
constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
location: | |
componentInputParameter: pipelinechannel--location | |
machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
taskInfo: | |
name: model-batch-predict-2 | |
model-evaluation-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-evaluation-2 | |
dependentTasks: | |
- model-batch-predict-2 | |
inputs: | |
artifacts: | |
batch_prediction_job: | |
taskOutputArtifact: | |
outputArtifactKey: batchpredictionjob | |
producerTask: model-batch-predict-2 | |
parameters: | |
dataflow_disk_size: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
dataflow_max_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
dataflow_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
ground_truth_column: | |
componentInputParameter: pipelinechannel--target_column | |
ground_truth_format: | |
runtimeValue: | |
constant: jsonl | |
location: | |
componentInputParameter: pipelinechannel--location | |
prediction_label_column: | |
runtimeValue: | |
constant: '' | |
prediction_score_column: | |
runtimeValue: | |
constant: '' | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
problem_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
taskInfo: | |
name: model-evaluation-2 | |
model-evaluation-import-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-evaluation-import-2 | |
dependentTasks: | |
- feature-attribution-2 | |
- model-evaluation-2 | |
inputs: | |
artifacts: | |
feature_attributions: | |
taskOutputArtifact: | |
outputArtifactKey: feature_attributions | |
producerTask: feature-attribution-2 | |
metrics: | |
taskOutputArtifact: | |
outputArtifactKey: evaluation_metrics | |
producerTask: model-evaluation-2 | |
model: | |
componentInputArtifact: pipelinechannel--model-upload-2-model | |
parameters: | |
dataset_paths: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
dataset_type: | |
runtimeValue: | |
constant: tf-record | |
display_name: | |
runtimeValue: | |
constant: AutoML Tabular | |
problem_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
taskInfo: | |
name: model-evaluation-import-2 | |
inputDefinitions: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-2-explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
pipelinechannel--model-upload-2-model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
parameters: | |
pipelinechannel--automl-tabular-ensemble-2-explanation_parameters: | |
parameterType: STRUCT | |
pipelinechannel--bool-identity-2-Output: | |
parameterType: STRING | |
pipelinechannel--bool-identity-3-Output: | |
parameterType: STRING | |
pipelinechannel--dataflow_service_account: | |
parameterType: STRING | |
pipelinechannel--dataflow_subnetwork: | |
parameterType: STRING | |
pipelinechannel--dataflow_use_public_ips: | |
parameterType: BOOLEAN | |
pipelinechannel--encryption_spec_key_name: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--location: | |
parameterType: STRING | |
pipelinechannel--prediction_type: | |
parameterType: STRING | |
pipelinechannel--project: | |
parameterType: STRING | |
pipelinechannel--root_dir: | |
parameterType: STRING | |
pipelinechannel--string-not-empty-Output: | |
parameterType: STRING | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
parameterType: LIST | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
parameterType: LIST | |
pipelinechannel--target_column: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-2-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
comp-condition-7: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-3-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-3-feature_attributions | |
producerSubtask: condition-8 | |
model-evaluation-3-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-3-evaluation_metrics | |
producerSubtask: condition-8 | |
tasks: | |
automl-tabular-ensemble-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-ensemble-3 | |
dependentTasks: | |
- automl-tabular-stage-1-tuner-2 | |
- automl-tabular-transform-2 | |
inputs: | |
artifacts: | |
dataset_schema: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema | |
instance_baseline: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-instance_baseline | |
metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
transform_output: | |
taskOutputArtifact: | |
outputArtifactKey: transform_output | |
producerTask: automl-tabular-transform-2 | |
tuning_result_input: | |
taskOutputArtifact: | |
outputArtifactKey: tuning_result_output | |
producerTask: automl-tabular-stage-1-tuner-2 | |
warmup_data: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
export_additional_model_without_custom_ops: | |
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops | |
location: | |
componentInputParameter: pipelinechannel--location | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
taskInfo: | |
name: automl-tabular-ensemble-3 | |
automl-tabular-infra-validator-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-infra-validator-3 | |
dependentTasks: | |
- automl-tabular-ensemble-3 | |
inputs: | |
artifacts: | |
unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble-3 | |
taskInfo: | |
name: automl-tabular-infra-validator-3 | |
automl-tabular-stage-1-tuner-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-stage-1-tuner-2 | |
dependentTasks: | |
- automl-tabular-transform-2 | |
inputs: | |
artifacts: | |
materialized_eval_split: | |
taskOutputArtifact: | |
outputArtifactKey: materialized_eval_split | |
producerTask: automl-tabular-transform-2 | |
materialized_train_split: | |
taskOutputArtifact: | |
outputArtifactKey: materialized_train_split | |
producerTask: automl-tabular-transform-2 | |
metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
transform_output: | |
taskOutputArtifact: | |
outputArtifactKey: transform_output | |
producerTask: automl-tabular-transform-2 | |
parameters: | |
deadline_hours: | |
componentInputParameter: pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours | |
disable_early_stopping: | |
componentInputParameter: pipelinechannel--disable_early_stopping | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials | |
num_selected_trials: | |
runtimeValue: | |
constant: 1.0 | |
project: | |
componentInputParameter: pipelinechannel--project | |
reduce_search_space_mode: | |
componentInputParameter: pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
run_distillation: | |
runtimeValue: | |
constant: 1.0 | |
single_run_max_secs: | |
componentInputParameter: pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs | |
worker_pool_specs_override_json: | |
componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override | |
taskInfo: | |
name: automl-tabular-stage-1-tuner-2 | |
automl-tabular-transform-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-transform-2 | |
dependentTasks: | |
- write-bp-result-path | |
- write-bp-result-path-2 | |
inputs: | |
artifacts: | |
dataset_schema: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-dataset_schema | |
eval_split: | |
taskOutputArtifact: | |
outputArtifactKey: result | |
producerTask: write-bp-result-path-2 | |
metadata: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-metadata | |
test_split: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-test_split | |
train_split: | |
taskOutputArtifact: | |
outputArtifactKey: result | |
producerTask: write-bp-result-path | |
parameters: | |
dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--transform_dataflow_machine_type | |
dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
taskInfo: | |
name: automl-tabular-transform-2 | |
condition-8: | |
componentRef: | |
name: comp-condition-8 | |
dependentTasks: | |
- automl-tabular-ensemble-3 | |
- model-upload-3 | |
inputs: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: | |
taskOutputArtifact: | |
outputArtifactKey: explanation_metadata_artifact | |
producerTask: automl-tabular-ensemble-3 | |
pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble-3 | |
pipelinechannel--model-upload-3-model: | |
taskOutputArtifact: | |
outputArtifactKey: model | |
producerTask: model-upload-3 | |
parameters: | |
pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: | |
taskOutputParameter: | |
outputParameterKey: explanation_parameters | |
producerTask: automl-tabular-ensemble-3 | |
pipelinechannel--bool-identity-2-Output: | |
componentInputParameter: pipelinechannel--bool-identity-2-Output | |
pipelinechannel--bool-identity-3-Output: | |
componentInputParameter: pipelinechannel--bool-identity-3-Output | |
pipelinechannel--dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
pipelinechannel--dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
pipelinechannel--dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
pipelinechannel--encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
pipelinechannel--evaluation_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
pipelinechannel--location: | |
componentInputParameter: pipelinechannel--location | |
pipelinechannel--prediction_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
pipelinechannel--project: | |
componentInputParameter: pipelinechannel--project | |
pipelinechannel--root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
pipelinechannel--string-not-empty-Output: | |
componentInputParameter: pipelinechannel--string-not-empty-Output | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
pipelinechannel--target_column: | |
componentInputParameter: pipelinechannel--target_column | |
taskInfo: | |
name: is-evaluation | |
triggerPolicy: | |
condition: inputs.parameter_values['pipelinechannel--bool-identity-2-Output'] | |
== 'true' | |
model-batch-predict-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-batch-predict-3 | |
dependentTasks: | |
- read-input-uri | |
inputs: | |
artifacts: | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
gcs_destination_output_uri_prefix: | |
componentInputParameter: pipelinechannel--root_dir | |
gcs_source_uris: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: read-input-uri | |
instances_format: | |
runtimeValue: | |
constant: tf-record | |
job_display_name: | |
runtimeValue: | |
constant: batch-predict-train-split | |
location: | |
componentInputParameter: pipelinechannel--location | |
machine_type: | |
componentInputParameter: pipelinechannel--distill_batch_predict_machine_type | |
max_replica_count: | |
componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count | |
predictions_format: | |
runtimeValue: | |
constant: tf-record | |
project: | |
componentInputParameter: pipelinechannel--project | |
starting_replica_count: | |
componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count | |
taskInfo: | |
name: model-batch-predict-3 | |
model-batch-predict-4: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-batch-predict-4 | |
dependentTasks: | |
- read-input-uri-2 | |
inputs: | |
artifacts: | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
gcs_destination_output_uri_prefix: | |
componentInputParameter: pipelinechannel--root_dir | |
gcs_source_uris: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: read-input-uri-2 | |
instances_format: | |
runtimeValue: | |
constant: tf-record | |
job_display_name: | |
runtimeValue: | |
constant: batch-predict-eval-split | |
location: | |
componentInputParameter: pipelinechannel--location | |
machine_type: | |
componentInputParameter: pipelinechannel--distill_batch_predict_machine_type | |
max_replica_count: | |
componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count | |
predictions_format: | |
runtimeValue: | |
constant: tf-record | |
project: | |
componentInputParameter: pipelinechannel--project | |
starting_replica_count: | |
componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count | |
taskInfo: | |
name: model-batch-predict-4 | |
model-upload-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-upload-3 | |
dependentTasks: | |
- automl-tabular-ensemble-3 | |
- automl-tabular-infra-validator-3 | |
inputs: | |
artifacts: | |
explanation_metadata_artifact: | |
taskOutputArtifact: | |
outputArtifactKey: explanation_metadata_artifact | |
producerTask: automl-tabular-ensemble-3 | |
unmanaged_container_model: | |
taskOutputArtifact: | |
outputArtifactKey: unmanaged_container_model | |
producerTask: automl-tabular-ensemble-3 | |
parameters: | |
display_name: | |
runtimeValue: | |
constant: automl-tabular-distill-model-upload-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
explanation_parameters: | |
taskOutputParameter: | |
outputParameterKey: explanation_parameters | |
producerTask: automl-tabular-ensemble-3 | |
location: | |
componentInputParameter: pipelinechannel--location | |
project: | |
componentInputParameter: pipelinechannel--project | |
taskInfo: | |
name: model-upload-3 | |
read-input-uri: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-read-input-uri | |
inputs: | |
artifacts: | |
split_uri: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-train_split | |
taskInfo: | |
name: read-input-uri | |
read-input-uri-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-read-input-uri-2 | |
inputs: | |
artifacts: | |
split_uri: | |
componentInputArtifact: pipelinechannel--tabular-stats-and-example-gen-eval_split | |
taskInfo: | |
name: read-input-uri-2 | |
write-bp-result-path: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-write-bp-result-path | |
dependentTasks: | |
- model-batch-predict-3 | |
inputs: | |
artifacts: | |
bp_job: | |
taskOutputArtifact: | |
outputArtifactKey: batchpredictionjob | |
producerTask: model-batch-predict-3 | |
taskInfo: | |
name: write-bp-result-path | |
write-bp-result-path-2: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-write-bp-result-path-2 | |
dependentTasks: | |
- model-batch-predict-4 | |
inputs: | |
artifacts: | |
bp_job: | |
taskOutputArtifact: | |
outputArtifactKey: batchpredictionjob | |
producerTask: model-batch-predict-4 | |
taskInfo: | |
name: write-bp-result-path-2 | |
inputDefinitions: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-2-unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-eval_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-instance_baseline: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-test_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
pipelinechannel--tabular-stats-and-example-gen-train_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
parameters: | |
pipelinechannel--bool-identity-2-Output: | |
parameterType: STRING | |
pipelinechannel--bool-identity-3-Output: | |
parameterType: STRING | |
pipelinechannel--calculate-training-parameters-2-distill_stage_1_deadline_hours: | |
parameterType: NUMBER_DOUBLE | |
pipelinechannel--calculate-training-parameters-2-reduce_search_space_mode: | |
parameterType: STRING | |
pipelinechannel--calculate-training-parameters-2-stage_1_single_run_max_secs: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--dataflow_service_account: | |
parameterType: STRING | |
pipelinechannel--dataflow_subnetwork: | |
parameterType: STRING | |
pipelinechannel--dataflow_use_public_ips: | |
parameterType: BOOLEAN | |
pipelinechannel--disable_early_stopping: | |
parameterType: BOOLEAN | |
pipelinechannel--distill_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--distill_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--distill_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--encryption_spec_key_name: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--export_additional_model_without_custom_ops: | |
parameterType: BOOLEAN | |
pipelinechannel--location: | |
parameterType: STRING | |
pipelinechannel--prediction_type: | |
parameterType: STRING | |
pipelinechannel--project: | |
parameterType: STRING | |
pipelinechannel--root_dir: | |
parameterType: STRING | |
pipelinechannel--stage_1_num_parallel_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stage_1_tuner_worker_pool_specs_override: | |
parameterType: LIST | |
pipelinechannel--string-not-empty-Output: | |
parameterType: STRING | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
parameterType: LIST | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
parameterType: LIST | |
pipelinechannel--target_column: | |
parameterType: STRING | |
pipelinechannel--transform_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--transform_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--transform_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
feature-attribution-3-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-3-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
comp-condition-8: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-3-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature_attributions | |
producerSubtask: feature-attribution-3 | |
model-evaluation-3-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: evaluation_metrics | |
producerSubtask: model-evaluation-3 | |
tasks: | |
feature-attribution-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-feature-attribution-3 | |
dependentTasks: | |
- model-batch-explanation-3 | |
inputs: | |
artifacts: | |
predictions_gcs_source: | |
taskOutputArtifact: | |
outputArtifactKey: gcs_output_directory | |
producerTask: model-batch-explanation-3 | |
parameters: | |
dataflow_disk_size: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
dataflow_max_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
dataflow_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
taskInfo: | |
name: feature-attribution-3 | |
model-batch-explanation-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-batch-explanation-3 | |
inputs: | |
artifacts: | |
explanation_metadata_artifact: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
explanation_parameters: | |
componentInputParameter: pipelinechannel--automl-tabular-ensemble-3-explanation_parameters | |
gcs_destination_output_uri_prefix: | |
componentInputParameter: pipelinechannel--root_dir | |
gcs_source_uris: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json | |
generate_explanation: | |
runtimeValue: | |
constant: 1.0 | |
instances_format: | |
runtimeValue: | |
constant: tf-record | |
job_display_name: | |
runtimeValue: | |
constant: batch-explain-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
location: | |
componentInputParameter: pipelinechannel--location | |
machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
taskInfo: | |
name: model-batch-explanation-3 | |
model-batch-predict-5: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-batch-predict-5 | |
inputs: | |
artifacts: | |
unmanaged_container_model: | |
componentInputArtifact: pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model | |
parameters: | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
gcs_destination_output_uri_prefix: | |
componentInputParameter: pipelinechannel--root_dir | |
gcs_source_uris: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
instances_format: | |
runtimeValue: | |
constant: tf-record | |
job_display_name: | |
runtimeValue: | |
constant: batch-predict-evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
location: | |
componentInputParameter: pipelinechannel--location | |
machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
project: | |
componentInputParameter: pipelinechannel--project | |
starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
taskInfo: | |
name: model-batch-predict-5 | |
model-evaluation-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-evaluation-3 | |
dependentTasks: | |
- model-batch-predict-5 | |
inputs: | |
artifacts: | |
batch_prediction_job: | |
taskOutputArtifact: | |
outputArtifactKey: batchpredictionjob | |
producerTask: model-batch-predict-5 | |
parameters: | |
dataflow_disk_size: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
dataflow_max_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
dataflow_workers_num: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
ground_truth_column: | |
componentInputParameter: pipelinechannel--target_column | |
ground_truth_format: | |
runtimeValue: | |
constant: jsonl | |
location: | |
componentInputParameter: pipelinechannel--location | |
prediction_label_column: | |
runtimeValue: | |
constant: '' | |
prediction_score_column: | |
runtimeValue: | |
constant: '' | |
predictions_format: | |
runtimeValue: | |
constant: jsonl | |
problem_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
taskInfo: | |
name: model-evaluation-3 | |
model-evaluation-import-3: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-model-evaluation-import-3 | |
dependentTasks: | |
- feature-attribution-3 | |
- model-evaluation-3 | |
inputs: | |
artifacts: | |
feature_attributions: | |
taskOutputArtifact: | |
outputArtifactKey: feature_attributions | |
producerTask: feature-attribution-3 | |
metrics: | |
taskOutputArtifact: | |
outputArtifactKey: evaluation_metrics | |
producerTask: model-evaluation-3 | |
model: | |
componentInputArtifact: pipelinechannel--model-upload-3-model | |
parameters: | |
dataset_paths: | |
componentInputParameter: pipelinechannel--tabular-stats-and-example-gen-test_split_json | |
dataset_type: | |
runtimeValue: | |
constant: tf-record | |
display_name: | |
runtimeValue: | |
constant: AutoML Tabular | |
problem_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
taskInfo: | |
name: model-evaluation-import-3 | |
inputDefinitions: | |
artifacts: | |
pipelinechannel--automl-tabular-ensemble-3-explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
pipelinechannel--automl-tabular-ensemble-3-unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
pipelinechannel--model-upload-3-model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
parameters: | |
pipelinechannel--automl-tabular-ensemble-3-explanation_parameters: | |
parameterType: STRUCT | |
pipelinechannel--bool-identity-2-Output: | |
parameterType: STRING | |
pipelinechannel--bool-identity-3-Output: | |
parameterType: STRING | |
pipelinechannel--dataflow_service_account: | |
parameterType: STRING | |
pipelinechannel--dataflow_subnetwork: | |
parameterType: STRING | |
pipelinechannel--dataflow_use_public_ips: | |
parameterType: BOOLEAN | |
pipelinechannel--encryption_spec_key_name: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--location: | |
parameterType: STRING | |
pipelinechannel--prediction_type: | |
parameterType: STRING | |
pipelinechannel--project: | |
parameterType: STRING | |
pipelinechannel--root_dir: | |
parameterType: STRING | |
pipelinechannel--string-not-empty-Output: | |
parameterType: STRING | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
parameterType: LIST | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
parameterType: LIST | |
pipelinechannel--target_column: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature-attribution-3-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-3-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
comp-exit-handler-1: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-2-feature_attributions | |
producerSubtask: condition-4 | |
feature-attribution-3-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-3-feature_attributions | |
producerSubtask: condition-4 | |
feature-attribution-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-feature_attributions | |
producerSubtask: condition-2 | |
model-evaluation-2-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-2-evaluation_metrics | |
producerSubtask: condition-4 | |
model-evaluation-3-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-3-evaluation_metrics | |
producerSubtask: condition-4 | |
model-evaluation-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-evaluation_metrics | |
producerSubtask: condition-2 | |
tasks: | |
automl-tabular-transform: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-transform | |
dependentTasks: | |
- tabular-stats-and-example-gen | |
inputs: | |
artifacts: | |
dataset_schema: | |
taskOutputArtifact: | |
outputArtifactKey: dataset_schema | |
producerTask: tabular-stats-and-example-gen | |
eval_split: | |
taskOutputArtifact: | |
outputArtifactKey: eval_split | |
producerTask: tabular-stats-and-example-gen | |
metadata: | |
taskOutputArtifact: | |
outputArtifactKey: metadata | |
producerTask: tabular-stats-and-example-gen | |
test_split: | |
taskOutputArtifact: | |
outputArtifactKey: test_split | |
producerTask: tabular-stats-and-example-gen | |
train_split: | |
taskOutputArtifact: | |
outputArtifactKey: train_split | |
producerTask: tabular-stats-and-example-gen | |
parameters: | |
dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--transform_dataflow_machine_type | |
dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
project: | |
componentInputParameter: pipelinechannel--project | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
taskInfo: | |
name: automl-tabular-transform | |
condition-2: | |
componentRef: | |
name: comp-condition-2 | |
dependentTasks: | |
- automl-tabular-transform | |
- merge-materialized-splits | |
- string-not-empty | |
- tabular-stats-and-example-gen | |
inputs: | |
artifacts: | |
pipelinechannel--automl-tabular-transform-transform_output: | |
taskOutputArtifact: | |
outputArtifactKey: transform_output | |
producerTask: automl-tabular-transform | |
pipelinechannel--merge-materialized-splits-splits: | |
taskOutputArtifact: | |
outputArtifactKey: splits | |
producerTask: merge-materialized-splits | |
pipelinechannel--parent_model: | |
componentInputArtifact: pipelinechannel--parent_model | |
pipelinechannel--tabular-stats-and-example-gen-dataset_schema: | |
taskOutputArtifact: | |
outputArtifactKey: dataset_schema | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-eval_split: | |
taskOutputArtifact: | |
outputArtifactKey: eval_split | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-instance_baseline: | |
taskOutputArtifact: | |
outputArtifactKey: instance_baseline | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-metadata: | |
taskOutputArtifact: | |
outputArtifactKey: metadata | |
producerTask: tabular-stats-and-example-gen | |
parameters: | |
pipelinechannel--cv_trainer_worker_pool_specs_override: | |
componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override | |
pipelinechannel--dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
pipelinechannel--dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
pipelinechannel--dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
pipelinechannel--encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
pipelinechannel--evaluation_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
pipelinechannel--export_additional_model_without_custom_ops: | |
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops | |
pipelinechannel--fast_testing: | |
componentInputParameter: pipelinechannel--fast_testing | |
pipelinechannel--location: | |
componentInputParameter: pipelinechannel--location | |
pipelinechannel--model_description: | |
componentInputParameter: pipelinechannel--model_description | |
pipelinechannel--prediction_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
pipelinechannel--project: | |
componentInputParameter: pipelinechannel--project | |
pipelinechannel--root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
pipelinechannel--run_distillation: | |
componentInputParameter: pipelinechannel--run_distillation | |
pipelinechannel--run_evaluation: | |
componentInputParameter: pipelinechannel--run_evaluation | |
pipelinechannel--set-optional-inputs-model_display_name: | |
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name | |
pipelinechannel--stage_1_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials | |
pipelinechannel--stage_1_tuning_result_artifact_uri: | |
componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri | |
pipelinechannel--stage_2_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials | |
pipelinechannel--stage_2_num_selected_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_selected_trials | |
pipelinechannel--string-not-empty-Output: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: string-not-empty | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
taskOutputParameter: | |
outputParameterKey: downsampled_test_split_json | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
taskOutputParameter: | |
outputParameterKey: test_split_json | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--target_column: | |
componentInputParameter: pipelinechannel--target_column | |
pipelinechannel--train_budget_milli_node_hours: | |
componentInputParameter: pipelinechannel--train_budget_milli_node_hours | |
taskInfo: | |
name: stage_1_tuning_result_artifact_uri_not_empty | |
triggerPolicy: | |
condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] | |
== 'true' | |
condition-4: | |
componentRef: | |
name: comp-condition-4 | |
dependentTasks: | |
- automl-tabular-transform | |
- merge-materialized-splits | |
- string-not-empty | |
- tabular-stats-and-example-gen | |
inputs: | |
artifacts: | |
pipelinechannel--automl-tabular-transform-materialized_eval_split: | |
taskOutputArtifact: | |
outputArtifactKey: materialized_eval_split | |
producerTask: automl-tabular-transform | |
pipelinechannel--automl-tabular-transform-materialized_train_split: | |
taskOutputArtifact: | |
outputArtifactKey: materialized_train_split | |
producerTask: automl-tabular-transform | |
pipelinechannel--automl-tabular-transform-transform_output: | |
taskOutputArtifact: | |
outputArtifactKey: transform_output | |
producerTask: automl-tabular-transform | |
pipelinechannel--merge-materialized-splits-splits: | |
taskOutputArtifact: | |
outputArtifactKey: splits | |
producerTask: merge-materialized-splits | |
pipelinechannel--parent_model: | |
componentInputArtifact: pipelinechannel--parent_model | |
pipelinechannel--tabular-stats-and-example-gen-dataset_schema: | |
taskOutputArtifact: | |
outputArtifactKey: dataset_schema | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-eval_split: | |
taskOutputArtifact: | |
outputArtifactKey: eval_split | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-instance_baseline: | |
taskOutputArtifact: | |
outputArtifactKey: instance_baseline | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-metadata: | |
taskOutputArtifact: | |
outputArtifactKey: metadata | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-test_split: | |
taskOutputArtifact: | |
outputArtifactKey: test_split | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-train_split: | |
taskOutputArtifact: | |
outputArtifactKey: train_split | |
producerTask: tabular-stats-and-example-gen | |
parameters: | |
pipelinechannel--cv_trainer_worker_pool_specs_override: | |
componentInputParameter: pipelinechannel--cv_trainer_worker_pool_specs_override | |
pipelinechannel--dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
pipelinechannel--dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
pipelinechannel--dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
pipelinechannel--disable_early_stopping: | |
componentInputParameter: pipelinechannel--disable_early_stopping | |
pipelinechannel--distill_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--distill_batch_predict_machine_type | |
pipelinechannel--distill_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--distill_batch_predict_max_replica_count | |
pipelinechannel--distill_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--distill_batch_predict_starting_replica_count | |
pipelinechannel--encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_machine_type | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_max_replica_count | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_explain_starting_replica_count | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_machine_type | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_max_replica_count | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
componentInputParameter: pipelinechannel--evaluation_batch_predict_starting_replica_count | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_disk_size_gb | |
pipelinechannel--evaluation_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_machine_type | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_max_num_workers | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
componentInputParameter: pipelinechannel--evaluation_dataflow_starting_num_workers | |
pipelinechannel--export_additional_model_without_custom_ops: | |
componentInputParameter: pipelinechannel--export_additional_model_without_custom_ops | |
pipelinechannel--fast_testing: | |
componentInputParameter: pipelinechannel--fast_testing | |
pipelinechannel--location: | |
componentInputParameter: pipelinechannel--location | |
pipelinechannel--model_description: | |
componentInputParameter: pipelinechannel--model_description | |
pipelinechannel--prediction_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
pipelinechannel--project: | |
componentInputParameter: pipelinechannel--project | |
pipelinechannel--root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
pipelinechannel--run_distillation: | |
componentInputParameter: pipelinechannel--run_distillation | |
pipelinechannel--run_evaluation: | |
componentInputParameter: pipelinechannel--run_evaluation | |
pipelinechannel--set-optional-inputs-model_display_name: | |
componentInputParameter: pipelinechannel--set-optional-inputs-model_display_name | |
pipelinechannel--stage_1_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_1_num_parallel_trials | |
pipelinechannel--stage_1_tuner_worker_pool_specs_override: | |
componentInputParameter: pipelinechannel--stage_1_tuner_worker_pool_specs_override | |
pipelinechannel--stage_2_num_parallel_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_parallel_trials | |
pipelinechannel--stage_2_num_selected_trials: | |
componentInputParameter: pipelinechannel--stage_2_num_selected_trials | |
pipelinechannel--string-not-empty-Output: | |
taskOutputParameter: | |
outputParameterKey: Output | |
producerTask: string-not-empty | |
pipelinechannel--study_spec_parameters_override: | |
componentInputParameter: pipelinechannel--study_spec_parameters_override | |
pipelinechannel--tabular-stats-and-example-gen-downsampled_test_split_json: | |
taskOutputParameter: | |
outputParameterKey: downsampled_test_split_json | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--tabular-stats-and-example-gen-test_split_json: | |
taskOutputParameter: | |
outputParameterKey: test_split_json | |
producerTask: tabular-stats-and-example-gen | |
pipelinechannel--target_column: | |
componentInputParameter: pipelinechannel--target_column | |
pipelinechannel--train_budget_milli_node_hours: | |
componentInputParameter: pipelinechannel--train_budget_milli_node_hours | |
pipelinechannel--transform_dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--transform_dataflow_disk_size_gb | |
pipelinechannel--transform_dataflow_machine_type: | |
componentInputParameter: pipelinechannel--transform_dataflow_machine_type | |
pipelinechannel--transform_dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--transform_dataflow_max_num_workers | |
taskInfo: | |
name: stage_1_tuning_result_artifact_uri_empty | |
triggerPolicy: | |
condition: inputs.parameter_values['pipelinechannel--string-not-empty-Output'] | |
== 'false' | |
merge-materialized-splits: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-merge-materialized-splits | |
dependentTasks: | |
- automl-tabular-transform | |
inputs: | |
artifacts: | |
split_0: | |
taskOutputArtifact: | |
outputArtifactKey: materialized_train_split | |
producerTask: automl-tabular-transform | |
split_1: | |
taskOutputArtifact: | |
outputArtifactKey: materialized_eval_split | |
producerTask: automl-tabular-transform | |
taskInfo: | |
name: merge-materialized-splits | |
string-not-empty: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-string-not-empty | |
inputs: | |
parameters: | |
value: | |
componentInputParameter: pipelinechannel--stage_1_tuning_result_artifact_uri | |
taskInfo: | |
name: string-not-empty | |
tabular-stats-and-example-gen: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-tabular-stats-and-example-gen | |
inputs: | |
parameters: | |
additional_experiments_json: | |
componentInputParameter: pipelinechannel--additional_experiments | |
data_source_bigquery_table_path: | |
componentInputParameter: pipelinechannel--set-optional-inputs-data_source_bigquery_table_path | |
data_source_csv_filenames: | |
componentInputParameter: pipelinechannel--set-optional-inputs-data_source_csv_filenames | |
dataflow_disk_size_gb: | |
componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb | |
dataflow_machine_type: | |
componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_machine_type | |
dataflow_max_num_workers: | |
componentInputParameter: pipelinechannel--stats_and_example_gen_dataflow_max_num_workers | |
dataflow_service_account: | |
componentInputParameter: pipelinechannel--dataflow_service_account | |
dataflow_subnetwork: | |
componentInputParameter: pipelinechannel--dataflow_subnetwork | |
dataflow_use_public_ips: | |
componentInputParameter: pipelinechannel--dataflow_use_public_ips | |
enable_probabilistic_inference: | |
componentInputParameter: pipelinechannel--enable_probabilistic_inference | |
encryption_spec_key_name: | |
componentInputParameter: pipelinechannel--encryption_spec_key_name | |
location: | |
componentInputParameter: pipelinechannel--location | |
optimization_objective: | |
componentInputParameter: pipelinechannel--optimization_objective | |
optimization_objective_precision_value: | |
componentInputParameter: pipelinechannel--optimization_objective_precision_value | |
optimization_objective_recall_value: | |
componentInputParameter: pipelinechannel--optimization_objective_recall_value | |
predefined_split_key: | |
componentInputParameter: pipelinechannel--predefined_split_key | |
prediction_type: | |
componentInputParameter: pipelinechannel--prediction_type | |
project: | |
componentInputParameter: pipelinechannel--project | |
quantiles: | |
componentInputParameter: pipelinechannel--quantiles | |
root_dir: | |
componentInputParameter: pipelinechannel--root_dir | |
run_distillation: | |
componentInputParameter: pipelinechannel--run_distillation | |
stratified_split_key: | |
componentInputParameter: pipelinechannel--stratified_split_key | |
target_column_name: | |
componentInputParameter: pipelinechannel--target_column | |
test_fraction: | |
componentInputParameter: pipelinechannel--test_fraction | |
timestamp_split_key: | |
componentInputParameter: pipelinechannel--timestamp_split_key | |
training_fraction: | |
componentInputParameter: pipelinechannel--training_fraction | |
transformations: | |
runtimeValue: | |
constant: '[]' | |
transformations_path: | |
componentInputParameter: pipelinechannel--transformations | |
validation_fraction: | |
componentInputParameter: pipelinechannel--validation_fraction | |
weight_column_name: | |
componentInputParameter: pipelinechannel--weight_column | |
taskInfo: | |
name: tabular-stats-and-example-gen | |
inputDefinitions: | |
artifacts: | |
pipelinechannel--parent_model: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
parameters: | |
pipelinechannel--additional_experiments: | |
parameterType: STRUCT | |
pipelinechannel--cv_trainer_worker_pool_specs_override: | |
parameterType: LIST | |
pipelinechannel--dataflow_service_account: | |
parameterType: STRING | |
pipelinechannel--dataflow_subnetwork: | |
parameterType: STRING | |
pipelinechannel--dataflow_use_public_ips: | |
parameterType: BOOLEAN | |
pipelinechannel--disable_early_stopping: | |
parameterType: BOOLEAN | |
pipelinechannel--distill_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--distill_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--distill_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--enable_probabilistic_inference: | |
parameterType: BOOLEAN | |
pipelinechannel--encryption_spec_key_name: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--export_additional_model_without_custom_ops: | |
parameterType: BOOLEAN | |
pipelinechannel--fast_testing: | |
parameterType: BOOLEAN | |
pipelinechannel--location: | |
parameterType: STRING | |
pipelinechannel--model_description: | |
parameterType: STRING | |
pipelinechannel--optimization_objective: | |
parameterType: STRING | |
pipelinechannel--optimization_objective_precision_value: | |
parameterType: NUMBER_DOUBLE | |
pipelinechannel--optimization_objective_recall_value: | |
parameterType: NUMBER_DOUBLE | |
pipelinechannel--predefined_split_key: | |
parameterType: STRING | |
pipelinechannel--prediction_type: | |
parameterType: STRING | |
pipelinechannel--project: | |
parameterType: STRING | |
pipelinechannel--quantiles: | |
parameterType: LIST | |
pipelinechannel--root_dir: | |
parameterType: STRING | |
pipelinechannel--run_distillation: | |
parameterType: BOOLEAN | |
pipelinechannel--run_evaluation: | |
parameterType: BOOLEAN | |
pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: | |
parameterType: STRING | |
pipelinechannel--set-optional-inputs-data_source_csv_filenames: | |
parameterType: STRING | |
pipelinechannel--set-optional-inputs-model_display_name: | |
parameterType: STRING | |
pipelinechannel--stage_1_num_parallel_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stage_1_tuner_worker_pool_specs_override: | |
parameterType: LIST | |
pipelinechannel--stage_1_tuning_result_artifact_uri: | |
parameterType: STRING | |
pipelinechannel--stage_2_num_parallel_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stage_2_num_selected_trials: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stats_and_example_gen_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--stratified_split_key: | |
parameterType: STRING | |
pipelinechannel--study_spec_parameters_override: | |
parameterType: LIST | |
pipelinechannel--target_column: | |
parameterType: STRING | |
pipelinechannel--test_fraction: | |
parameterType: NUMBER_DOUBLE | |
pipelinechannel--timestamp_split_key: | |
parameterType: STRING | |
pipelinechannel--train_budget_milli_node_hours: | |
parameterType: NUMBER_DOUBLE | |
pipelinechannel--training_fraction: | |
parameterType: NUMBER_DOUBLE | |
pipelinechannel--transform_dataflow_disk_size_gb: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--transform_dataflow_machine_type: | |
parameterType: STRING | |
pipelinechannel--transform_dataflow_max_num_workers: | |
parameterType: NUMBER_INTEGER | |
pipelinechannel--transformations: | |
parameterType: STRING | |
pipelinechannel--validation_fraction: | |
parameterType: NUMBER_DOUBLE | |
pipelinechannel--weight_column: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
feature-attribution-3-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
feature-attribution-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-2-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-3-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
comp-feature-attribution: | |
executorLabel: exec-feature-attribution | |
inputDefinitions: | |
artifacts: | |
predictions_bigquery_source: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
description: 'BigQuery table | |
with prediction or explanation data to be used for this evaluation. For | |
prediction results, the table column should be named "predicted_*".' | |
isOptional: true | |
predictions_gcs_source: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'An artifact with its | |
URI pointing toward a GCS directory with prediction or explanation files | |
to be used for this evaluation. For prediction results, the files should | |
be named "prediction.results-*" or "predictions_". For explanation | |
results, the files should be named "explanation.results-*".' | |
isOptional: true | |
parameters: | |
dataflow_disk_size: | |
defaultValue: 50.0 | |
description: 'The disk size (in GB) of the machine | |
executing the evaluation run. If not set, defaulted to `50`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-4 | |
description: 'The machine type executing the | |
evaluation run. If not set, defaulted to `n1-standard-4`.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_workers_num: | |
defaultValue: 5.0 | |
description: 'The max number of workers | |
executing the evaluation run. If not set, defaulted to `25`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
description: 'Service account to run the | |
dataflow job. If not set, dataflow will use the default worker service | |
account. For more details, see | |
https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
description: 'Dataflow''s fully qualified subnetwork | |
name, when empty the default subnetwork will be used. More details: | |
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
description: 'Specifies whether Dataflow | |
workers use public IP addresses.' | |
isOptional: true | |
parameterType: BOOLEAN | |
dataflow_workers_num: | |
defaultValue: 1.0 | |
description: 'The number of workers executing the | |
evaluation run. If not set, defaulted to `10`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: 'Customer-managed encryption key | |
for the Dataflow job. If this is set, then all resources created by the | |
Dataflow job will be encrypted with the provided encryption key.' | |
isOptional: true | |
parameterType: STRING | |
force_direct_runner: | |
defaultValue: false | |
description: 'Flag to use Beam DirectRunner. If set to true, | |
use Apache Beam DirectRunner to execute the task locally instead of | |
launching a Dataflow job.' | |
isOptional: true | |
parameterType: BOOLEAN | |
location: | |
defaultValue: us-central1 | |
description: 'Location running feature attribution. If not | |
set, defaulted to `us-central1`.' | |
isOptional: true | |
parameterType: STRING | |
predictions_format: | |
defaultValue: jsonl | |
description: 'The file format for the batch | |
prediction results. `jsonl`, `csv`, and `bigquery` are the allowed | |
formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' | |
isOptional: true | |
parameterType: STRING | |
project: | |
description: Project to run feature attribution container. | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
description: 'Serialized gcp_resources proto tracking the dataflow | |
job. For more details, see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-feature-attribution-2: | |
executorLabel: exec-feature-attribution-2 | |
inputDefinitions: | |
artifacts: | |
predictions_bigquery_source: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
description: 'BigQuery table | |
with prediction or explanation data to be used for this evaluation. For | |
prediction results, the table column should be named "predicted_*".' | |
isOptional: true | |
predictions_gcs_source: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'An artifact with its | |
URI pointing toward a GCS directory with prediction or explanation files | |
to be used for this evaluation. For prediction results, the files should | |
be named "prediction.results-*" or "predictions_". For explanation | |
results, the files should be named "explanation.results-*".' | |
isOptional: true | |
parameters: | |
dataflow_disk_size: | |
defaultValue: 50.0 | |
description: 'The disk size (in GB) of the machine | |
executing the evaluation run. If not set, defaulted to `50`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-4 | |
description: 'The machine type executing the | |
evaluation run. If not set, defaulted to `n1-standard-4`.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_workers_num: | |
defaultValue: 5.0 | |
description: 'The max number of workers | |
executing the evaluation run. If not set, defaulted to `25`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
description: 'Service account to run the | |
dataflow job. If not set, dataflow will use the default worker service | |
account. For more details, see | |
https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
description: 'Dataflow''s fully qualified subnetwork | |
name, when empty the default subnetwork will be used. More details: | |
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
description: 'Specifies whether Dataflow | |
workers use public IP addresses.' | |
isOptional: true | |
parameterType: BOOLEAN | |
dataflow_workers_num: | |
defaultValue: 1.0 | |
description: 'The number of workers executing the | |
evaluation run. If not set, defaulted to `10`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: 'Customer-managed encryption key | |
for the Dataflow job. If this is set, then all resources created by the | |
Dataflow job will be encrypted with the provided encryption key.' | |
isOptional: true | |
parameterType: STRING | |
force_direct_runner: | |
defaultValue: false | |
description: 'Flag to use Beam DirectRunner. If set to true, | |
use Apache Beam DirectRunner to execute the task locally instead of | |
launching a Dataflow job.' | |
isOptional: true | |
parameterType: BOOLEAN | |
location: | |
defaultValue: us-central1 | |
description: 'Location running feature attribution. If not | |
set, defaulted to `us-central1`.' | |
isOptional: true | |
parameterType: STRING | |
predictions_format: | |
defaultValue: jsonl | |
description: 'The file format for the batch | |
prediction results. `jsonl`, `csv`, and `bigquery` are the allowed | |
formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' | |
isOptional: true | |
parameterType: STRING | |
project: | |
description: Project to run feature attribution container. | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
description: 'Serialized gcp_resources proto tracking the dataflow | |
job. For more details, see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-feature-attribution-3: | |
executorLabel: exec-feature-attribution-3 | |
inputDefinitions: | |
artifacts: | |
predictions_bigquery_source: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
description: 'BigQuery table | |
with prediction or explanation data to be used for this evaluation. For | |
prediction results, the table column should be named "predicted_*".' | |
isOptional: true | |
predictions_gcs_source: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'An artifact with its | |
URI pointing toward a GCS directory with prediction or explanation files | |
to be used for this evaluation. For prediction results, the files should | |
be named "prediction.results-*" or "predictions_". For explanation | |
results, the files should be named "explanation.results-*".' | |
isOptional: true | |
parameters: | |
dataflow_disk_size: | |
defaultValue: 50.0 | |
description: 'The disk size (in GB) of the machine | |
executing the evaluation run. If not set, defaulted to `50`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-4 | |
description: 'The machine type executing the | |
evaluation run. If not set, defaulted to `n1-standard-4`.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_workers_num: | |
defaultValue: 5.0 | |
description: 'The max number of workers | |
executing the evaluation run. If not set, defaulted to `25`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
description: 'Service account to run the | |
dataflow job. If not set, dataflow will use the default worker service | |
account. For more details, see | |
https://cloud.google.com/dataflow/docs/concepts/security-and-permissions#default_worker_service_account' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
description: 'Dataflow''s fully qualified subnetwork | |
name, when empty the default subnetwork will be used. More details: | |
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
description: 'Specifies whether Dataflow | |
workers use public IP addresses.' | |
isOptional: true | |
parameterType: BOOLEAN | |
dataflow_workers_num: | |
defaultValue: 1.0 | |
description: 'The number of workers executing the | |
evaluation run. If not set, defaulted to `10`.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: 'Customer-managed encryption key | |
for the Dataflow job. If this is set, then all resources created by the | |
Dataflow job will be encrypted with the provided encryption key.' | |
isOptional: true | |
parameterType: STRING | |
force_direct_runner: | |
defaultValue: false | |
description: 'Flag to use Beam DirectRunner. If set to true, | |
use Apache Beam DirectRunner to execute the task locally instead of | |
launching a Dataflow job.' | |
isOptional: true | |
parameterType: BOOLEAN | |
location: | |
defaultValue: us-central1 | |
description: 'Location running feature attribution. If not | |
set, defaulted to `us-central1`.' | |
isOptional: true | |
parameterType: STRING | |
predictions_format: | |
defaultValue: jsonl | |
description: 'The file format for the batch | |
prediction results. `jsonl`, `csv`, and `bigquery` are the allowed | |
formats, from Vertex Batch Prediction. If not set, defaulted to `jsonl`.' | |
isOptional: true | |
parameterType: STRING | |
project: | |
description: Project to run feature attribution container. | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
description: 'Serialized gcp_resources proto tracking the dataflow | |
job. For more details, see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-importer: | |
executorLabel: exec-importer | |
inputDefinitions: | |
parameters: | |
uri: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
comp-merge-materialized-splits: | |
executorLabel: exec-merge-materialized-splits | |
inputDefinitions: | |
artifacts: | |
split_0: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The first materialized split. | |
split_1: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The second materialized split. | |
outputDefinitions: | |
artifacts: | |
splits: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
comp-model-batch-explanation: | |
executorLabel: exec-model-batch-explanation | |
inputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
accelerator_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
accelerator_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
bigquery_destination_output_uri: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
bigquery_source_input_uri: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
explanation_metadata: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
gcs_destination_output_uri_prefix: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
gcs_source_uris: | |
defaultValue: [] | |
isOptional: true | |
parameterType: LIST | |
generate_explanation: | |
defaultValue: false | |
isOptional: true | |
parameterType: BOOLEAN | |
instances_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
job_display_name: | |
parameterType: STRING | |
labels: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
machine_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
manual_batch_tuning_parameters_batch_size: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
max_replica_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
model_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
predictions_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
starting_replica_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
batchpredictionjob: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
bigquery_output_table: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
gcs_output_directory: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-batch-explanation-2: | |
executorLabel: exec-model-batch-explanation-2 | |
inputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
accelerator_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
accelerator_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
bigquery_destination_output_uri: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
bigquery_source_input_uri: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
explanation_metadata: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
gcs_destination_output_uri_prefix: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
gcs_source_uris: | |
defaultValue: [] | |
isOptional: true | |
parameterType: LIST | |
generate_explanation: | |
defaultValue: false | |
isOptional: true | |
parameterType: BOOLEAN | |
instances_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
job_display_name: | |
parameterType: STRING | |
labels: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
machine_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
manual_batch_tuning_parameters_batch_size: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
max_replica_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
model_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
predictions_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
starting_replica_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
batchpredictionjob: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
bigquery_output_table: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
gcs_output_directory: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-batch-explanation-3: | |
executorLabel: exec-model-batch-explanation-3 | |
inputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
accelerator_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
accelerator_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
bigquery_destination_output_uri: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
bigquery_source_input_uri: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
explanation_metadata: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
gcs_destination_output_uri_prefix: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
gcs_source_uris: | |
defaultValue: [] | |
isOptional: true | |
parameterType: LIST | |
generate_explanation: | |
defaultValue: false | |
isOptional: true | |
parameterType: BOOLEAN | |
instances_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
job_display_name: | |
parameterType: STRING | |
labels: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
machine_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
manual_batch_tuning_parameters_batch_size: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
max_replica_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
model_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
predictions_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
starting_replica_count: | |
defaultValue: 0.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
batchpredictionjob: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
bigquery_output_table: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
gcs_output_directory: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-batch-predict: | |
executorLabel: exec-model-batch-predict | |
inputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
description: 'The Model used to get predictions via this job. Must share | |
the same | |
ancestor Location. Starting this job has no impact on any existing | |
deployments of the Model and their resources. Either this or | |
unmanaged_container_model must be specified.' | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
description: 'The unmanaged container model used to get predictions via | |
this job. | |
This should be used for models that are not uploaded to Vertex. Either | |
this or model must be specified.' | |
isOptional: true | |
parameters: | |
accelerator_count: | |
defaultValue: 0.0 | |
description: 'The number of accelerators to attach | |
to the `machine_type`. Only used if `machine_type` is set. For more | |
details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
accelerator_type: | |
defaultValue: '' | |
description: 'The type of accelerator(s) that may be | |
attached to the machine as per `accelerator_count`. Only used if | |
`machine_type` is set. For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
bigquery_destination_output_uri: | |
defaultValue: '' | |
description: 'The BigQuery project location where the output is to be written | |
to. In | |
the given project a new dataset is created with name | |
``prediction_<model-display-name>_<job-create-time>`` where is made | |
BigQuery-dataset-name compatible (for example, most special characters | |
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ | |
"based on ISO-8601" format. In the dataset two tables will be created, | |
``predictions``, and ``errors``. If the Model has both ``instance`` | |
and ``prediction`` schemata defined then the tables have columns as | |
follows: The ``predictions`` table contains instances for which the | |
prediction succeeded, it has columns as per a concatenation of the | |
Model''s instance and prediction schemata. The ``errors`` table | |
contains rows for which the prediction has failed, it has instance | |
columns, as per the instance schema, followed by a single "errors" | |
column, which as values has ```google.rpc.Status`` <Status>`__ | |
represented as a STRUCT, and containing only ``code`` and | |
``message``. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
bigquery_source_input_uri: | |
defaultValue: '' | |
description: 'BigQuery URI to a table, up to 2000 characters long. For example: | |
`projectId.bqDatasetId.bqTableId` For more details about this input | |
config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' | |
isOptional: true | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: 'Customer-managed encryption | |
key options for a BatchPredictionJob. If this is set, then all | |
resources created by the BatchPredictionJob will be encrypted with the | |
provided encryption key. Has the form: | |
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. | |
The key needs to be in the same region as where the compute resource | |
is created.' | |
isOptional: true | |
parameterType: STRING | |
excluded_fields: | |
defaultValue: [] | |
description: 'Fields that will be excluded in the prediction instance that | |
is | |
sent to the Model. | |
Excluded will be attached to the batch prediction output if | |
[key_field][] is not specified. | |
When excluded_fields is populated, [included_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord. | |
may be specified via the Model''s `parameters_schema_uri`.' | |
isOptional: true | |
parameterType: LIST | |
explanation_metadata: | |
defaultValue: {} | |
description: 'Explanation metadata | |
configuration for this BatchPredictionJob. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_metadata`. All fields of | |
`explanation_metadata` are optional in the request. If a field of the | |
`explanation_metadata` object is not populated, the corresponding | |
field of the `Model.explanation_metadata` object is inherited. For | |
more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
description: 'Parameters to configure | |
explaining for Model''s predictions. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_parameters`. All fields of | |
`explanation_parameters` are optional in the request. If a field of | |
the `explanation_parameters` object is not populated, the | |
corresponding field of the `Model.explanation_parameters` object is | |
inherited. For more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' | |
isOptional: true | |
parameterType: STRUCT | |
gcs_destination_output_uri_prefix: | |
defaultValue: '' | |
description: 'The Google Cloud | |
Storage location of the directory where the output is to be written | |
to. In the given directory a new directory is created. Its name is | |
``prediction-<model-display-name>-<job-create-time>``, where timestamp | |
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files | |
``predictions_0001.<extension>``, ``predictions_0002.<extension>``, | |
..., ``predictions_N.<extension>`` are created where ``<extension>`` | |
depends on chosen ``predictions_format``, and N may equal 0001 and | |
depends on the total number of successfully predicted instances. If | |
the Model has both ``instance`` and ``prediction`` schemata defined | |
then each such file contains predictions as per the | |
``predictions_format``. If prediction for any instance failed | |
(partially or completely), then an additional | |
``errors_0001.<extension>``, ``errors_0002.<extension>``,..., | |
``errors_N.<extension>`` files are created (N depends on total number | |
of failed predictions). These files contain the failed instances, as | |
per their schema, followed by an additional ``error`` field which as | |
value has ``google.rpc.Status`` containing only ``code`` and | |
``message`` fields. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
gcs_source_uris: | |
defaultValue: [] | |
description: "Google Cloud Storage URI(-s) to your instances to run batch\ | |
\ prediction\non. They must match `instances_format`. May contain wildcards.\ | |
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ | |
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: LIST | |
generate_explanation: | |
defaultValue: false | |
description: 'Generate explanation along with | |
the batch prediction results. This will cause the batch prediction | |
output to include explanations based on the `prediction_format`: - | |
`bigquery`: output includes a column named `explanation`. The value is | |
a struct that conforms to the [aiplatform.gapic.Explanation] object. - | |
`jsonl`: The JSON objects on each line include an additional entry | |
keyed `explanation`. The value of the entry is a JSON object that | |
conforms to the [aiplatform.gapic.Explanation] object. - `csv`: | |
Generating explanations for CSV format is not supported. If this | |
field is set to true, either the Model.explanation_spec or | |
explanation_metadata and explanation_parameters must be populated.' | |
isOptional: true | |
parameterType: BOOLEAN | |
included_fields: | |
defaultValue: [] | |
description: 'Fields that will be included in the prediction instance that | |
is | |
sent to the Model. | |
If [instance_type][] is `array`, the order of field names in | |
included_fields also determines the order of the values in the array. | |
When included_fields is populated, [excluded_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord.' | |
isOptional: true | |
parameterType: LIST | |
instance_type: | |
defaultValue: '' | |
description: "The format of the instance that the Model accepts. Vertex\ | |
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ | |
to the specified format.\nSupported values are:\n** `object`: Each input\ | |
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\ | |
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ | |
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ | |
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ | |
\ each row is converted to an array. The order\n of columns is determined\ | |
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ | |
\ [included_fields][] must be populated for specifying field orders.\n\ | |
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ | |
\ must be populated for specifying field orders.\n* Does not apply to\ | |
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ | |
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ | |
\ and `csv`, the behavior is the same as `array`. The\n order of columns\ | |
\ is the same as defined in the file or table, unless\n [included_fields][]\ | |
\ is populated.\n * For `jsonl`, the prediction instance format is determined\ | |
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ | |
\ each record will be converted to\n an object in the format of `{\"\ | |
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\ | |
\ the content of the record.\n * For `file-list`, each file in the list\ | |
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\ | |
\ where `<value>` is\n the Base64-encoded string of the content of the\ | |
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ | |
\ Base64 is not for this field. --)" | |
isOptional: true | |
parameterType: STRING | |
instances_format: | |
defaultValue: jsonl | |
description: "The format in which instances are\ngiven, must be one of the\ | |
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ | |
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: STRING | |
job_display_name: | |
description: The user-defined name of this BatchPredictionJob. | |
parameterType: STRING | |
key_field: | |
defaultValue: '' | |
description: "The name of the field that is considered as a key.\nThe values\ | |
\ identified by the key field is not included in the\ntransformed instances\ | |
\ that is sent to the Model. This is similar to\nspecifying this name\ | |
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ | |
\ output will not include the instances. Instead the\noutput will only\ | |
\ include the value of the key field, in a field named\n`key` in the output:\n\ | |
\ * For `jsonl` output format, the output will have a `key` field\n \ | |
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ | |
\ the output will have have a `key`\n column instead of the instance\ | |
\ feature columns.\nThe input must be JSONL with objects at each line,\ | |
\ CSV, BigQuery\nor TfRecord." | |
isOptional: true | |
parameterType: STRING | |
labels: | |
defaultValue: {} | |
description: 'The labels with user-defined metadata to | |
organize your BatchPredictionJobs. Label keys and values can be no | |
longer than 64 characters (Unicode codepoints), can only contain | |
lowercase letters, numeric characters, underscores and dashes. | |
International characters are allowed. See https://goo.gl/xmQnxf for | |
more information and examples of labels.' | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
description: 'Location for creating the BatchPredictionJob. | |
If not set, default to us-central1.' | |
isOptional: true | |
parameterType: STRING | |
machine_type: | |
defaultValue: '' | |
description: 'The type of machine for running batch | |
prediction on dedicated resources. If the Model supports | |
DEDICATED_RESOURCES this config may be provided (and the job will use | |
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, | |
this config must be provided. For more details about the | |
BatchDedicatedResources, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. | |
For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
manual_batch_tuning_parameters_batch_size: | |
defaultValue: 0.0 | |
description: 'The number of | |
the records (e.g. instances) of the operation given in each batch to a | |
machine replica. Machine type, and size of a single record should be | |
considered when setting this parameter, higher value speeds up the | |
batch operation''s execution, but too high value will result in a whole | |
batch not fitting in a machine''s memory, and the whole operation will | |
fail. The default value is 4.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
max_replica_count: | |
defaultValue: 0.0 | |
description: 'The maximum number of machine replicas the batch operation | |
may be scaled | |
to. Only used if `machine_type` is set. Default is 10.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
model_parameters: | |
defaultValue: {} | |
description: The parameters that govern the predictions. The schema of the | |
parameters | |
isOptional: true | |
parameterType: STRUCT | |
predictions_format: | |
defaultValue: jsonl | |
description: "The format in which Vertex AI gives the predictions. Must\ | |
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ | |
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." | |
isOptional: true | |
parameterType: STRING | |
project: | |
description: Project to create the BatchPredictionJob. | |
parameterType: STRING | |
starting_replica_count: | |
defaultValue: 0.0 | |
description: 'The number of machine replicas | |
used at the start of the batch operation. If not set, Vertex AI | |
decides starting number, not greater than `max_replica_count`. Only | |
used if `machine_type` is set.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
batchpredictionjob: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table | |
instead.**] Artifact | |
representation of the created batch prediction job.' | |
bigquery_output_table: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
bigquery_output_table is specified.' | |
gcs_output_directory: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
gcs_destination_output_uri_prefix is specified.' | |
parameters: | |
gcp_resources: | |
description: 'Serialized gcp_resources proto tracking the batch prediction | |
job. | |
For more details, see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-model-batch-predict-2: | |
executorLabel: exec-model-batch-predict-2 | |
inputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
description: 'The Model used to get predictions via this job. Must share | |
the same | |
ancestor Location. Starting this job has no impact on any existing | |
deployments of the Model and their resources. Either this or | |
unmanaged_container_model must be specified.' | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
description: 'The unmanaged container model used to get predictions via | |
this job. | |
This should be used for models that are not uploaded to Vertex. Either | |
this or model must be specified.' | |
isOptional: true | |
parameters: | |
accelerator_count: | |
defaultValue: 0.0 | |
description: 'The number of accelerators to attach | |
to the `machine_type`. Only used if `machine_type` is set. For more | |
details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
accelerator_type: | |
defaultValue: '' | |
description: 'The type of accelerator(s) that may be | |
attached to the machine as per `accelerator_count`. Only used if | |
`machine_type` is set. For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
bigquery_destination_output_uri: | |
defaultValue: '' | |
description: 'The BigQuery project location where the output is to be written | |
to. In | |
the given project a new dataset is created with name | |
``prediction_<model-display-name>_<job-create-time>`` where is made | |
BigQuery-dataset-name compatible (for example, most special characters | |
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ | |
"based on ISO-8601" format. In the dataset two tables will be created, | |
``predictions``, and ``errors``. If the Model has both ``instance`` | |
and ``prediction`` schemata defined then the tables have columns as | |
follows: The ``predictions`` table contains instances for which the | |
prediction succeeded, it has columns as per a concatenation of the | |
Model''s instance and prediction schemata. The ``errors`` table | |
contains rows for which the prediction has failed, it has instance | |
columns, as per the instance schema, followed by a single "errors" | |
column, which as values has ```google.rpc.Status`` <Status>`__ | |
represented as a STRUCT, and containing only ``code`` and | |
``message``. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
bigquery_source_input_uri: | |
defaultValue: '' | |
description: 'BigQuery URI to a table, up to 2000 characters long. For example: | |
`projectId.bqDatasetId.bqTableId` For more details about this input | |
config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' | |
isOptional: true | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: 'Customer-managed encryption | |
key options for a BatchPredictionJob. If this is set, then all | |
resources created by the BatchPredictionJob will be encrypted with the | |
provided encryption key. Has the form: | |
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. | |
The key needs to be in the same region as where the compute resource | |
is created.' | |
isOptional: true | |
parameterType: STRING | |
excluded_fields: | |
defaultValue: [] | |
description: 'Fields that will be excluded in the prediction instance that | |
is | |
sent to the Model. | |
Excluded will be attached to the batch prediction output if | |
[key_field][] is not specified. | |
When excluded_fields is populated, [included_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord. | |
may be specified via the Model''s `parameters_schema_uri`.' | |
isOptional: true | |
parameterType: LIST | |
explanation_metadata: | |
defaultValue: {} | |
description: 'Explanation metadata | |
configuration for this BatchPredictionJob. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_metadata`. All fields of | |
`explanation_metadata` are optional in the request. If a field of the | |
`explanation_metadata` object is not populated, the corresponding | |
field of the `Model.explanation_metadata` object is inherited. For | |
more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
description: 'Parameters to configure | |
explaining for Model''s predictions. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_parameters`. All fields of | |
`explanation_parameters` are optional in the request. If a field of | |
the `explanation_parameters` object is not populated, the | |
corresponding field of the `Model.explanation_parameters` object is | |
inherited. For more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' | |
isOptional: true | |
parameterType: STRUCT | |
gcs_destination_output_uri_prefix: | |
defaultValue: '' | |
description: 'The Google Cloud | |
Storage location of the directory where the output is to be written | |
to. In the given directory a new directory is created. Its name is | |
``prediction-<model-display-name>-<job-create-time>``, where timestamp | |
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files | |
``predictions_0001.<extension>``, ``predictions_0002.<extension>``, | |
..., ``predictions_N.<extension>`` are created where ``<extension>`` | |
depends on chosen ``predictions_format``, and N may equal 0001 and | |
depends on the total number of successfully predicted instances. If | |
the Model has both ``instance`` and ``prediction`` schemata defined | |
then each such file contains predictions as per the | |
``predictions_format``. If prediction for any instance failed | |
(partially or completely), then an additional | |
``errors_0001.<extension>``, ``errors_0002.<extension>``,..., | |
``errors_N.<extension>`` files are created (N depends on total number | |
of failed predictions). These files contain the failed instances, as | |
per their schema, followed by an additional ``error`` field which as | |
value has ``google.rpc.Status`` containing only ``code`` and | |
``message`` fields. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
gcs_source_uris: | |
defaultValue: [] | |
description: "Google Cloud Storage URI(-s) to your instances to run batch\ | |
\ prediction\non. They must match `instances_format`. May contain wildcards.\ | |
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ | |
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: LIST | |
generate_explanation: | |
defaultValue: false | |
description: 'Generate explanation along with | |
the batch prediction results. This will cause the batch prediction | |
output to include explanations based on the `prediction_format`: - | |
`bigquery`: output includes a column named `explanation`. The value is | |
a struct that conforms to the [aiplatform.gapic.Explanation] object. - | |
`jsonl`: The JSON objects on each line include an additional entry | |
keyed `explanation`. The value of the entry is a JSON object that | |
conforms to the [aiplatform.gapic.Explanation] object. - `csv`: | |
Generating explanations for CSV format is not supported. If this | |
field is set to true, either the Model.explanation_spec or | |
explanation_metadata and explanation_parameters must be populated.' | |
isOptional: true | |
parameterType: BOOLEAN | |
included_fields: | |
defaultValue: [] | |
description: 'Fields that will be included in the prediction instance that | |
is | |
sent to the Model. | |
If [instance_type][] is `array`, the order of field names in | |
included_fields also determines the order of the values in the array. | |
When included_fields is populated, [excluded_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord.' | |
isOptional: true | |
parameterType: LIST | |
instance_type: | |
defaultValue: '' | |
description: "The format of the instance that the Model accepts. Vertex\ | |
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ | |
to the specified format.\nSupported values are:\n** `object`: Each input\ | |
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\ | |
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ | |
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ | |
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ | |
\ each row is converted to an array. The order\n of columns is determined\ | |
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ | |
\ [included_fields][] must be populated for specifying field orders.\n\ | |
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ | |
\ must be populated for specifying field orders.\n* Does not apply to\ | |
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ | |
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ | |
\ and `csv`, the behavior is the same as `array`. The\n order of columns\ | |
\ is the same as defined in the file or table, unless\n [included_fields][]\ | |
\ is populated.\n * For `jsonl`, the prediction instance format is determined\ | |
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ | |
\ each record will be converted to\n an object in the format of `{\"\ | |
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\ | |
\ the content of the record.\n * For `file-list`, each file in the list\ | |
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\ | |
\ where `<value>` is\n the Base64-encoded string of the content of the\ | |
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ | |
\ Base64 is not for this field. --)" | |
isOptional: true | |
parameterType: STRING | |
instances_format: | |
defaultValue: jsonl | |
description: "The format in which instances are\ngiven, must be one of the\ | |
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ | |
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: STRING | |
job_display_name: | |
description: The user-defined name of this BatchPredictionJob. | |
parameterType: STRING | |
key_field: | |
defaultValue: '' | |
description: "The name of the field that is considered as a key.\nThe values\ | |
\ identified by the key field is not included in the\ntransformed instances\ | |
\ that is sent to the Model. This is similar to\nspecifying this name\ | |
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ | |
\ output will not include the instances. Instead the\noutput will only\ | |
\ include the value of the key field, in a field named\n`key` in the output:\n\ | |
\ * For `jsonl` output format, the output will have a `key` field\n \ | |
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ | |
\ the output will have have a `key`\n column instead of the instance\ | |
\ feature columns.\nThe input must be JSONL with objects at each line,\ | |
\ CSV, BigQuery\nor TfRecord." | |
isOptional: true | |
parameterType: STRING | |
labels: | |
defaultValue: {} | |
description: 'The labels with user-defined metadata to | |
organize your BatchPredictionJobs. Label keys and values can be no | |
longer than 64 characters (Unicode codepoints), can only contain | |
lowercase letters, numeric characters, underscores and dashes. | |
International characters are allowed. See https://goo.gl/xmQnxf for | |
more information and examples of labels.' | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
description: 'Location for creating the BatchPredictionJob. | |
If not set, default to us-central1.' | |
isOptional: true | |
parameterType: STRING | |
machine_type: | |
defaultValue: '' | |
description: 'The type of machine for running batch | |
prediction on dedicated resources. If the Model supports | |
DEDICATED_RESOURCES this config may be provided (and the job will use | |
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, | |
this config must be provided. For more details about the | |
BatchDedicatedResources, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. | |
For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
manual_batch_tuning_parameters_batch_size: | |
defaultValue: 0.0 | |
description: 'The number of | |
the records (e.g. instances) of the operation given in each batch to a | |
machine replica. Machine type, and size of a single record should be | |
considered when setting this parameter, higher value speeds up the | |
batch operation''s execution, but too high value will result in a whole | |
batch not fitting in a machine''s memory, and the whole operation will | |
fail. The default value is 4.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
max_replica_count: | |
defaultValue: 0.0 | |
description: 'The maximum number of machine replicas the batch operation | |
may be scaled | |
to. Only used if `machine_type` is set. Default is 10.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
model_parameters: | |
defaultValue: {} | |
description: The parameters that govern the predictions. The schema of the | |
parameters | |
isOptional: true | |
parameterType: STRUCT | |
predictions_format: | |
defaultValue: jsonl | |
description: "The format in which Vertex AI gives the predictions. Must\ | |
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ | |
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." | |
isOptional: true | |
parameterType: STRING | |
project: | |
description: Project to create the BatchPredictionJob. | |
parameterType: STRING | |
starting_replica_count: | |
defaultValue: 0.0 | |
description: 'The number of machine replicas | |
used at the start of the batch operation. If not set, Vertex AI | |
decides starting number, not greater than `max_replica_count`. Only | |
used if `machine_type` is set.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
batchpredictionjob: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table | |
instead.**] Artifact | |
representation of the created batch prediction job.' | |
bigquery_output_table: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
bigquery_output_table is specified.' | |
gcs_output_directory: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
gcs_destination_output_uri_prefix is specified.' | |
parameters: | |
gcp_resources: | |
description: 'Serialized gcp_resources proto tracking the batch prediction | |
job. | |
For more details, see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-model-batch-predict-3: | |
executorLabel: exec-model-batch-predict-3 | |
inputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
description: 'The Model used to get predictions via this job. Must share | |
the same | |
ancestor Location. Starting this job has no impact on any existing | |
deployments of the Model and their resources. Either this or | |
unmanaged_container_model must be specified.' | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
description: 'The unmanaged container model used to get predictions via | |
this job. | |
This should be used for models that are not uploaded to Vertex. Either | |
this or model must be specified.' | |
isOptional: true | |
parameters: | |
accelerator_count: | |
defaultValue: 0.0 | |
description: 'The number of accelerators to attach | |
to the `machine_type`. Only used if `machine_type` is set. For more | |
details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
accelerator_type: | |
defaultValue: '' | |
description: 'The type of accelerator(s) that may be | |
attached to the machine as per `accelerator_count`. Only used if | |
`machine_type` is set. For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
bigquery_destination_output_uri: | |
defaultValue: '' | |
description: 'The BigQuery project location where the output is to be written | |
to. In | |
the given project a new dataset is created with name | |
``prediction_<model-display-name>_<job-create-time>`` where is made | |
BigQuery-dataset-name compatible (for example, most special characters | |
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ | |
"based on ISO-8601" format. In the dataset two tables will be created, | |
``predictions``, and ``errors``. If the Model has both ``instance`` | |
and ``prediction`` schemata defined then the tables have columns as | |
follows: The ``predictions`` table contains instances for which the | |
prediction succeeded, it has columns as per a concatenation of the | |
Model''s instance and prediction schemata. The ``errors`` table | |
contains rows for which the prediction has failed, it has instance | |
columns, as per the instance schema, followed by a single "errors" | |
column, which as values has ```google.rpc.Status`` <Status>`__ | |
represented as a STRUCT, and containing only ``code`` and | |
``message``. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
bigquery_source_input_uri: | |
defaultValue: '' | |
description: 'BigQuery URI to a table, up to 2000 characters long. For example: | |
`projectId.bqDatasetId.bqTableId` For more details about this input | |
config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' | |
isOptional: true | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: 'Customer-managed encryption | |
key options for a BatchPredictionJob. If this is set, then all | |
resources created by the BatchPredictionJob will be encrypted with the | |
provided encryption key. Has the form: | |
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. | |
The key needs to be in the same region as where the compute resource | |
is created.' | |
isOptional: true | |
parameterType: STRING | |
excluded_fields: | |
defaultValue: [] | |
description: 'Fields that will be excluded in the prediction instance that | |
is | |
sent to the Model. | |
Excluded will be attached to the batch prediction output if | |
[key_field][] is not specified. | |
When excluded_fields is populated, [included_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord. | |
may be specified via the Model''s `parameters_schema_uri`.' | |
isOptional: true | |
parameterType: LIST | |
explanation_metadata: | |
defaultValue: {} | |
description: 'Explanation metadata | |
configuration for this BatchPredictionJob. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_metadata`. All fields of | |
`explanation_metadata` are optional in the request. If a field of the | |
`explanation_metadata` object is not populated, the corresponding | |
field of the `Model.explanation_metadata` object is inherited. For | |
more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
description: 'Parameters to configure | |
explaining for Model''s predictions. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_parameters`. All fields of | |
`explanation_parameters` are optional in the request. If a field of | |
the `explanation_parameters` object is not populated, the | |
corresponding field of the `Model.explanation_parameters` object is | |
inherited. For more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' | |
isOptional: true | |
parameterType: STRUCT | |
gcs_destination_output_uri_prefix: | |
defaultValue: '' | |
description: 'The Google Cloud | |
Storage location of the directory where the output is to be written | |
to. In the given directory a new directory is created. Its name is | |
``prediction-<model-display-name>-<job-create-time>``, where timestamp | |
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files | |
``predictions_0001.<extension>``, ``predictions_0002.<extension>``, | |
..., ``predictions_N.<extension>`` are created where ``<extension>`` | |
depends on chosen ``predictions_format``, and N may equal 0001 and | |
depends on the total number of successfully predicted instances. If | |
the Model has both ``instance`` and ``prediction`` schemata defined | |
then each such file contains predictions as per the | |
``predictions_format``. If prediction for any instance failed | |
(partially or completely), then an additional | |
``errors_0001.<extension>``, ``errors_0002.<extension>``,..., | |
``errors_N.<extension>`` files are created (N depends on total number | |
of failed predictions). These files contain the failed instances, as | |
per their schema, followed by an additional ``error`` field which as | |
value has ``google.rpc.Status`` containing only ``code`` and | |
``message`` fields. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
gcs_source_uris: | |
defaultValue: [] | |
description: "Google Cloud Storage URI(-s) to your instances to run batch\ | |
\ prediction\non. They must match `instances_format`. May contain wildcards.\ | |
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ | |
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: LIST | |
generate_explanation: | |
defaultValue: false | |
description: 'Generate explanation along with | |
the batch prediction results. This will cause the batch prediction | |
output to include explanations based on the `prediction_format`: - | |
`bigquery`: output includes a column named `explanation`. The value is | |
a struct that conforms to the [aiplatform.gapic.Explanation] object. - | |
`jsonl`: The JSON objects on each line include an additional entry | |
keyed `explanation`. The value of the entry is a JSON object that | |
conforms to the [aiplatform.gapic.Explanation] object. - `csv`: | |
Generating explanations for CSV format is not supported. If this | |
field is set to true, either the Model.explanation_spec or | |
explanation_metadata and explanation_parameters must be populated.' | |
isOptional: true | |
parameterType: BOOLEAN | |
included_fields: | |
defaultValue: [] | |
description: 'Fields that will be included in the prediction instance that | |
is | |
sent to the Model. | |
If [instance_type][] is `array`, the order of field names in | |
included_fields also determines the order of the values in the array. | |
When included_fields is populated, [excluded_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord.' | |
isOptional: true | |
parameterType: LIST | |
instance_type: | |
defaultValue: '' | |
description: "The format of the instance that the Model accepts. Vertex\ | |
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ | |
to the specified format.\nSupported values are:\n** `object`: Each input\ | |
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\ | |
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ | |
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ | |
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ | |
\ each row is converted to an array. The order\n of columns is determined\ | |
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ | |
\ [included_fields][] must be populated for specifying field orders.\n\ | |
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ | |
\ must be populated for specifying field orders.\n* Does not apply to\ | |
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ | |
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ | |
\ and `csv`, the behavior is the same as `array`. The\n order of columns\ | |
\ is the same as defined in the file or table, unless\n [included_fields][]\ | |
\ is populated.\n * For `jsonl`, the prediction instance format is determined\ | |
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ | |
\ each record will be converted to\n an object in the format of `{\"\ | |
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\ | |
\ the content of the record.\n * For `file-list`, each file in the list\ | |
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\ | |
\ where `<value>` is\n the Base64-encoded string of the content of the\ | |
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ | |
\ Base64 is not for this field. --)" | |
isOptional: true | |
parameterType: STRING | |
instances_format: | |
defaultValue: jsonl | |
description: "The format in which instances are\ngiven, must be one of the\ | |
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ | |
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: STRING | |
job_display_name: | |
description: The user-defined name of this BatchPredictionJob. | |
parameterType: STRING | |
key_field: | |
defaultValue: '' | |
description: "The name of the field that is considered as a key.\nThe values\ | |
\ identified by the key field is not included in the\ntransformed instances\ | |
\ that is sent to the Model. This is similar to\nspecifying this name\ | |
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ | |
\ output will not include the instances. Instead the\noutput will only\ | |
\ include the value of the key field, in a field named\n`key` in the output:\n\ | |
\ * For `jsonl` output format, the output will have a `key` field\n \ | |
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ | |
\ the output will have have a `key`\n column instead of the instance\ | |
\ feature columns.\nThe input must be JSONL with objects at each line,\ | |
\ CSV, BigQuery\nor TfRecord." | |
isOptional: true | |
parameterType: STRING | |
labels: | |
defaultValue: {} | |
description: 'The labels with user-defined metadata to | |
organize your BatchPredictionJobs. Label keys and values can be no | |
longer than 64 characters (Unicode codepoints), can only contain | |
lowercase letters, numeric characters, underscores and dashes. | |
International characters are allowed. See https://goo.gl/xmQnxf for | |
more information and examples of labels.' | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
description: 'Location for creating the BatchPredictionJob. | |
If not set, default to us-central1.' | |
isOptional: true | |
parameterType: STRING | |
machine_type: | |
defaultValue: '' | |
description: 'The type of machine for running batch | |
prediction on dedicated resources. If the Model supports | |
DEDICATED_RESOURCES this config may be provided (and the job will use | |
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, | |
this config must be provided. For more details about the | |
BatchDedicatedResources, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. | |
For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
manual_batch_tuning_parameters_batch_size: | |
defaultValue: 0.0 | |
description: 'The number of | |
the records (e.g. instances) of the operation given in each batch to a | |
machine replica. Machine type, and size of a single record should be | |
considered when setting this parameter, higher value speeds up the | |
batch operation''s execution, but too high value will result in a whole | |
batch not fitting in a machine''s memory, and the whole operation will | |
fail. The default value is 4.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
max_replica_count: | |
defaultValue: 0.0 | |
description: 'The maximum number of machine replicas the batch operation | |
may be scaled | |
to. Only used if `machine_type` is set. Default is 10.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
model_parameters: | |
defaultValue: {} | |
description: The parameters that govern the predictions. The schema of the | |
parameters | |
isOptional: true | |
parameterType: STRUCT | |
predictions_format: | |
defaultValue: jsonl | |
description: "The format in which Vertex AI gives the predictions. Must\ | |
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ | |
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." | |
isOptional: true | |
parameterType: STRING | |
project: | |
description: Project to create the BatchPredictionJob. | |
parameterType: STRING | |
starting_replica_count: | |
defaultValue: 0.0 | |
description: 'The number of machine replicas | |
used at the start of the batch operation. If not set, Vertex AI | |
decides starting number, not greater than `max_replica_count`. Only | |
used if `machine_type` is set.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
batchpredictionjob: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table | |
instead.**] Artifact | |
representation of the created batch prediction job.' | |
bigquery_output_table: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
bigquery_output_table is specified.' | |
gcs_output_directory: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
gcs_destination_output_uri_prefix is specified.' | |
parameters: | |
gcp_resources: | |
description: 'Serialized gcp_resources proto tracking the batch prediction | |
job. | |
For more details, see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-model-batch-predict-4: | |
executorLabel: exec-model-batch-predict-4 | |
inputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
description: 'The Model used to get predictions via this job. Must share | |
the same | |
ancestor Location. Starting this job has no impact on any existing | |
deployments of the Model and their resources. Either this or | |
unmanaged_container_model must be specified.' | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
description: 'The unmanaged container model used to get predictions via | |
this job. | |
This should be used for models that are not uploaded to Vertex. Either | |
this or model must be specified.' | |
isOptional: true | |
parameters: | |
accelerator_count: | |
defaultValue: 0.0 | |
description: 'The number of accelerators to attach | |
to the `machine_type`. Only used if `machine_type` is set. For more | |
details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
accelerator_type: | |
defaultValue: '' | |
description: 'The type of accelerator(s) that may be | |
attached to the machine as per `accelerator_count`. Only used if | |
`machine_type` is set. For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
bigquery_destination_output_uri: | |
defaultValue: '' | |
description: 'The BigQuery project location where the output is to be written | |
to. In | |
the given project a new dataset is created with name | |
``prediction_<model-display-name>_<job-create-time>`` where is made | |
BigQuery-dataset-name compatible (for example, most special characters | |
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ | |
"based on ISO-8601" format. In the dataset two tables will be created, | |
``predictions``, and ``errors``. If the Model has both ``instance`` | |
and ``prediction`` schemata defined then the tables have columns as | |
follows: The ``predictions`` table contains instances for which the | |
prediction succeeded, it has columns as per a concatenation of the | |
Model''s instance and prediction schemata. The ``errors`` table | |
contains rows for which the prediction has failed, it has instance | |
columns, as per the instance schema, followed by a single "errors" | |
column, which as values has ```google.rpc.Status`` <Status>`__ | |
represented as a STRUCT, and containing only ``code`` and | |
``message``. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
bigquery_source_input_uri: | |
defaultValue: '' | |
description: 'BigQuery URI to a table, up to 2000 characters long. For example: | |
`projectId.bqDatasetId.bqTableId` For more details about this input | |
config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' | |
isOptional: true | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: 'Customer-managed encryption | |
key options for a BatchPredictionJob. If this is set, then all | |
resources created by the BatchPredictionJob will be encrypted with the | |
provided encryption key. Has the form: | |
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. | |
The key needs to be in the same region as where the compute resource | |
is created.' | |
isOptional: true | |
parameterType: STRING | |
excluded_fields: | |
defaultValue: [] | |
description: 'Fields that will be excluded in the prediction instance that | |
is | |
sent to the Model. | |
Excluded will be attached to the batch prediction output if | |
[key_field][] is not specified. | |
When excluded_fields is populated, [included_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord. | |
may be specified via the Model''s `parameters_schema_uri`.' | |
isOptional: true | |
parameterType: LIST | |
explanation_metadata: | |
defaultValue: {} | |
description: 'Explanation metadata | |
configuration for this BatchPredictionJob. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_metadata`. All fields of | |
`explanation_metadata` are optional in the request. If a field of the | |
`explanation_metadata` object is not populated, the corresponding | |
field of the `Model.explanation_metadata` object is inherited. For | |
more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
description: 'Parameters to configure | |
explaining for Model''s predictions. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_parameters`. All fields of | |
`explanation_parameters` are optional in the request. If a field of | |
the `explanation_parameters` object is not populated, the | |
corresponding field of the `Model.explanation_parameters` object is | |
inherited. For more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' | |
isOptional: true | |
parameterType: STRUCT | |
gcs_destination_output_uri_prefix: | |
defaultValue: '' | |
description: 'The Google Cloud | |
Storage location of the directory where the output is to be written | |
to. In the given directory a new directory is created. Its name is | |
``prediction-<model-display-name>-<job-create-time>``, where timestamp | |
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files | |
``predictions_0001.<extension>``, ``predictions_0002.<extension>``, | |
..., ``predictions_N.<extension>`` are created where ``<extension>`` | |
depends on chosen ``predictions_format``, and N may equal 0001 and | |
depends on the total number of successfully predicted instances. If | |
the Model has both ``instance`` and ``prediction`` schemata defined | |
then each such file contains predictions as per the | |
``predictions_format``. If prediction for any instance failed | |
(partially or completely), then an additional | |
``errors_0001.<extension>``, ``errors_0002.<extension>``,..., | |
``errors_N.<extension>`` files are created (N depends on total number | |
of failed predictions). These files contain the failed instances, as | |
per their schema, followed by an additional ``error`` field which as | |
value has ``google.rpc.Status`` containing only ``code`` and | |
``message`` fields. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
gcs_source_uris: | |
defaultValue: [] | |
description: "Google Cloud Storage URI(-s) to your instances to run batch\ | |
\ prediction\non. They must match `instances_format`. May contain wildcards.\ | |
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ | |
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: LIST | |
generate_explanation: | |
defaultValue: false | |
description: 'Generate explanation along with | |
the batch prediction results. This will cause the batch prediction | |
output to include explanations based on the `prediction_format`: - | |
`bigquery`: output includes a column named `explanation`. The value is | |
a struct that conforms to the [aiplatform.gapic.Explanation] object. - | |
`jsonl`: The JSON objects on each line include an additional entry | |
keyed `explanation`. The value of the entry is a JSON object that | |
conforms to the [aiplatform.gapic.Explanation] object. - `csv`: | |
Generating explanations for CSV format is not supported. If this | |
field is set to true, either the Model.explanation_spec or | |
explanation_metadata and explanation_parameters must be populated.' | |
isOptional: true | |
parameterType: BOOLEAN | |
included_fields: | |
defaultValue: [] | |
description: 'Fields that will be included in the prediction instance that | |
is | |
sent to the Model. | |
If [instance_type][] is `array`, the order of field names in | |
included_fields also determines the order of the values in the array. | |
When included_fields is populated, [excluded_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord.' | |
isOptional: true | |
parameterType: LIST | |
instance_type: | |
defaultValue: '' | |
description: "The format of the instance that the Model accepts. Vertex\ | |
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ | |
to the specified format.\nSupported values are:\n** `object`: Each input\ | |
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\ | |
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ | |
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ | |
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ | |
\ each row is converted to an array. The order\n of columns is determined\ | |
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ | |
\ [included_fields][] must be populated for specifying field orders.\n\ | |
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ | |
\ must be populated for specifying field orders.\n* Does not apply to\ | |
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ | |
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ | |
\ and `csv`, the behavior is the same as `array`. The\n order of columns\ | |
\ is the same as defined in the file or table, unless\n [included_fields][]\ | |
\ is populated.\n * For `jsonl`, the prediction instance format is determined\ | |
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ | |
\ each record will be converted to\n an object in the format of `{\"\ | |
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\ | |
\ the content of the record.\n * For `file-list`, each file in the list\ | |
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\ | |
\ where `<value>` is\n the Base64-encoded string of the content of the\ | |
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ | |
\ Base64 is not for this field. --)" | |
isOptional: true | |
parameterType: STRING | |
instances_format: | |
defaultValue: jsonl | |
description: "The format in which instances are\ngiven, must be one of the\ | |
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ | |
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: STRING | |
job_display_name: | |
description: The user-defined name of this BatchPredictionJob. | |
parameterType: STRING | |
key_field: | |
defaultValue: '' | |
description: "The name of the field that is considered as a key.\nThe values\ | |
\ identified by the key field is not included in the\ntransformed instances\ | |
\ that is sent to the Model. This is similar to\nspecifying this name\ | |
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ | |
\ output will not include the instances. Instead the\noutput will only\ | |
\ include the value of the key field, in a field named\n`key` in the output:\n\ | |
\ * For `jsonl` output format, the output will have a `key` field\n \ | |
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ | |
\ the output will have have a `key`\n column instead of the instance\ | |
\ feature columns.\nThe input must be JSONL with objects at each line,\ | |
\ CSV, BigQuery\nor TfRecord." | |
isOptional: true | |
parameterType: STRING | |
labels: | |
defaultValue: {} | |
description: 'The labels with user-defined metadata to | |
organize your BatchPredictionJobs. Label keys and values can be no | |
longer than 64 characters (Unicode codepoints), can only contain | |
lowercase letters, numeric characters, underscores and dashes. | |
International characters are allowed. See https://goo.gl/xmQnxf for | |
more information and examples of labels.' | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
description: 'Location for creating the BatchPredictionJob. | |
If not set, default to us-central1.' | |
isOptional: true | |
parameterType: STRING | |
machine_type: | |
defaultValue: '' | |
description: 'The type of machine for running batch | |
prediction on dedicated resources. If the Model supports | |
DEDICATED_RESOURCES this config may be provided (and the job will use | |
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, | |
this config must be provided. For more details about the | |
BatchDedicatedResources, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. | |
For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
manual_batch_tuning_parameters_batch_size: | |
defaultValue: 0.0 | |
description: 'The number of | |
the records (e.g. instances) of the operation given in each batch to a | |
machine replica. Machine type, and size of a single record should be | |
considered when setting this parameter, higher value speeds up the | |
batch operation''s execution, but too high value will result in a whole | |
batch not fitting in a machine''s memory, and the whole operation will | |
fail. The default value is 4.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
max_replica_count: | |
defaultValue: 0.0 | |
description: 'The maximum number of machine replicas the batch operation | |
may be scaled | |
to. Only used if `machine_type` is set. Default is 10.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
model_parameters: | |
defaultValue: {} | |
description: The parameters that govern the predictions. The schema of the | |
parameters | |
isOptional: true | |
parameterType: STRUCT | |
predictions_format: | |
defaultValue: jsonl | |
description: "The format in which Vertex AI gives the predictions. Must\ | |
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ | |
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." | |
isOptional: true | |
parameterType: STRING | |
project: | |
description: Project to create the BatchPredictionJob. | |
parameterType: STRING | |
starting_replica_count: | |
defaultValue: 0.0 | |
description: 'The number of machine replicas | |
used at the start of the batch operation. If not set, Vertex AI | |
decides starting number, not greater than `max_replica_count`. Only | |
used if `machine_type` is set.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
batchpredictionjob: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table | |
instead.**] Artifact | |
representation of the created batch prediction job.' | |
bigquery_output_table: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
bigquery_output_table is specified.' | |
gcs_output_directory: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
gcs_destination_output_uri_prefix is specified.' | |
parameters: | |
gcp_resources: | |
description: 'Serialized gcp_resources proto tracking the batch prediction | |
job. | |
For more details, see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-model-batch-predict-5: | |
executorLabel: exec-model-batch-predict-5 | |
inputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
description: 'The Model used to get predictions via this job. Must share | |
the same | |
ancestor Location. Starting this job has no impact on any existing | |
deployments of the Model and their resources. Either this or | |
unmanaged_container_model must be specified.' | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
description: 'The unmanaged container model used to get predictions via | |
this job. | |
This should be used for models that are not uploaded to Vertex. Either | |
this or model must be specified.' | |
isOptional: true | |
parameters: | |
accelerator_count: | |
defaultValue: 0.0 | |
description: 'The number of accelerators to attach | |
to the `machine_type`. Only used if `machine_type` is set. For more | |
details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
accelerator_type: | |
defaultValue: '' | |
description: 'The type of accelerator(s) that may be | |
attached to the machine as per `accelerator_count`. Only used if | |
`machine_type` is set. For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
bigquery_destination_output_uri: | |
defaultValue: '' | |
description: 'The BigQuery project location where the output is to be written | |
to. In | |
the given project a new dataset is created with name | |
``prediction_<model-display-name>_<job-create-time>`` where is made | |
BigQuery-dataset-name compatible (for example, most special characters | |
become underscores), and timestamp is in YYYY_MM_DDThh_mm_ss_sssZ | |
"based on ISO-8601" format. In the dataset two tables will be created, | |
``predictions``, and ``errors``. If the Model has both ``instance`` | |
and ``prediction`` schemata defined then the tables have columns as | |
follows: The ``predictions`` table contains instances for which the | |
prediction succeeded, it has columns as per a concatenation of the | |
Model''s instance and prediction schemata. The ``errors`` table | |
contains rows for which the prediction has failed, it has instance | |
columns, as per the instance schema, followed by a single "errors" | |
column, which as values has ```google.rpc.Status`` <Status>`__ | |
represented as a STRUCT, and containing only ``code`` and | |
``message``. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
bigquery_source_input_uri: | |
defaultValue: '' | |
description: 'BigQuery URI to a table, up to 2000 characters long. For example: | |
`projectId.bqDatasetId.bqTableId` For more details about this input | |
config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig.' | |
isOptional: true | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: 'Customer-managed encryption | |
key options for a BatchPredictionJob. If this is set, then all | |
resources created by the BatchPredictionJob will be encrypted with the | |
provided encryption key. Has the form: | |
``projects/my-project/locations/my-location/keyRings/my-kr/cryptoKeys/my-key``. | |
The key needs to be in the same region as where the compute resource | |
is created.' | |
isOptional: true | |
parameterType: STRING | |
excluded_fields: | |
defaultValue: [] | |
description: 'Fields that will be excluded in the prediction instance that | |
is | |
sent to the Model. | |
Excluded will be attached to the batch prediction output if | |
[key_field][] is not specified. | |
When excluded_fields is populated, [included_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord. | |
may be specified via the Model''s `parameters_schema_uri`.' | |
isOptional: true | |
parameterType: LIST | |
explanation_metadata: | |
defaultValue: {} | |
description: 'Explanation metadata | |
configuration for this BatchPredictionJob. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_metadata`. All fields of | |
`explanation_metadata` are optional in the request. If a field of the | |
`explanation_metadata` object is not populated, the corresponding | |
field of the `Model.explanation_metadata` object is inherited. For | |
more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#explanationmetadata.' | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
description: 'Parameters to configure | |
explaining for Model''s predictions. Can be specified only if | |
`generate_explanation` is set to `True`. This value overrides the | |
value of `Model.explanation_parameters`. All fields of | |
`explanation_parameters` are optional in the request. If a field of | |
the `explanation_parameters` object is not populated, the | |
corresponding field of the `Model.explanation_parameters` object is | |
inherited. For more details, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/ExplanationSpec#ExplanationParameters.' | |
isOptional: true | |
parameterType: STRUCT | |
gcs_destination_output_uri_prefix: | |
defaultValue: '' | |
description: 'The Google Cloud | |
Storage location of the directory where the output is to be written | |
to. In the given directory a new directory is created. Its name is | |
``prediction-<model-display-name>-<job-create-time>``, where timestamp | |
is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. Inside of it files | |
``predictions_0001.<extension>``, ``predictions_0002.<extension>``, | |
..., ``predictions_N.<extension>`` are created where ``<extension>`` | |
depends on chosen ``predictions_format``, and N may equal 0001 and | |
depends on the total number of successfully predicted instances. If | |
the Model has both ``instance`` and ``prediction`` schemata defined | |
then each such file contains predictions as per the | |
``predictions_format``. If prediction for any instance failed | |
(partially or completely), then an additional | |
``errors_0001.<extension>``, ``errors_0002.<extension>``,..., | |
``errors_N.<extension>`` files are created (N depends on total number | |
of failed predictions). These files contain the failed instances, as | |
per their schema, followed by an additional ``error`` field which as | |
value has ``google.rpc.Status`` containing only ``code`` and | |
``message`` fields. For more details about this output config, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig.' | |
isOptional: true | |
parameterType: STRING | |
gcs_source_uris: | |
defaultValue: [] | |
description: "Google Cloud Storage URI(-s) to your instances to run batch\ | |
\ prediction\non. They must match `instances_format`. May contain wildcards.\ | |
\ For more\ninformation on wildcards, see\n https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames.\n\ | |
For more details about this input config, see https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: LIST | |
generate_explanation: | |
defaultValue: false | |
description: 'Generate explanation along with | |
the batch prediction results. This will cause the batch prediction | |
output to include explanations based on the `prediction_format`: - | |
`bigquery`: output includes a column named `explanation`. The value is | |
a struct that conforms to the [aiplatform.gapic.Explanation] object. - | |
`jsonl`: The JSON objects on each line include an additional entry | |
keyed `explanation`. The value of the entry is a JSON object that | |
conforms to the [aiplatform.gapic.Explanation] object. - `csv`: | |
Generating explanations for CSV format is not supported. If this | |
field is set to true, either the Model.explanation_spec or | |
explanation_metadata and explanation_parameters must be populated.' | |
isOptional: true | |
parameterType: BOOLEAN | |
included_fields: | |
defaultValue: [] | |
description: 'Fields that will be included in the prediction instance that | |
is | |
sent to the Model. | |
If [instance_type][] is `array`, the order of field names in | |
included_fields also determines the order of the values in the array. | |
When included_fields is populated, [excluded_fields][] must be empty. | |
The input must be JSONL with objects at each line, CSV, BigQuery | |
or TfRecord.' | |
isOptional: true | |
parameterType: LIST | |
instance_type: | |
defaultValue: '' | |
description: "The format of the instance that the Model accepts. Vertex\ | |
\ AI will\nconvert compatible\n[batch prediction input instance formats][InputConfig.instances_format]\n\ | |
to the specified format.\nSupported values are:\n** `object`: Each input\ | |
\ is converted to JSON object format.\n* For `bigquery`, each row is converted\ | |
\ to an object.\n* For `jsonl`, each line of the JSONL input must be an\ | |
\ object.\n* Does not apply to `csv`, `file-list`, `tf-record`, or\n`tf-record-gzip`.\n\ | |
** `array`: Each input is converted to JSON array format.\n* For `bigquery`,\ | |
\ each row is converted to an array. The order\n of columns is determined\ | |
\ by the BigQuery column order, unless\n [included_fields][] is populated.\n\ | |
\ [included_fields][] must be populated for specifying field orders.\n\ | |
* For `jsonl`, if each line of the JSONL input is an object,\n [included_fields][]\ | |
\ must be populated for specifying field orders.\n* Does not apply to\ | |
\ `csv`, `file-list`, `tf-record`, or\n `tf-record-gzip`.\nIf not specified,\ | |
\ Vertex AI converts the batch prediction input as\nfollows:\n * For `bigquery`\ | |
\ and `csv`, the behavior is the same as `array`. The\n order of columns\ | |
\ is the same as defined in the file or table, unless\n [included_fields][]\ | |
\ is populated.\n * For `jsonl`, the prediction instance format is determined\ | |
\ by\n each line of the input.\n * For `tf-record`/`tf-record-gzip`,\ | |
\ each record will be converted to\n an object in the format of `{\"\ | |
b64\": <value>}`, where `<value>` is\n the Base64-encoded string of\ | |
\ the content of the record.\n * For `file-list`, each file in the list\ | |
\ will be converted to an\n object in the format of `{\"b64\": <value>}`,\ | |
\ where `<value>` is\n the Base64-encoded string of the content of the\ | |
\ file.\n(-- api-linter: core::0140::base64=disabled\n aip.dev/not-precedent:\ | |
\ Base64 is not for this field. --)" | |
isOptional: true | |
parameterType: STRING | |
instances_format: | |
defaultValue: jsonl | |
description: "The format in which instances are\ngiven, must be one of the\ | |
\ Model's supportedInputStorageFormats. If not\nset, default to \"jsonl\"\ | |
. For more details about this input config,\nsee\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#InputConfig." | |
isOptional: true | |
parameterType: STRING | |
job_display_name: | |
description: The user-defined name of this BatchPredictionJob. | |
parameterType: STRING | |
key_field: | |
defaultValue: '' | |
description: "The name of the field that is considered as a key.\nThe values\ | |
\ identified by the key field is not included in the\ntransformed instances\ | |
\ that is sent to the Model. This is similar to\nspecifying this name\ | |
\ of the field in [excluded_fields][]. In addition,\nthe batch prediction\ | |
\ output will not include the instances. Instead the\noutput will only\ | |
\ include the value of the key field, in a field named\n`key` in the output:\n\ | |
\ * For `jsonl` output format, the output will have a `key` field\n \ | |
\ instead of the `instance` field.\n * For `csv`/`bigquery` output format,\ | |
\ the output will have have a `key`\n column instead of the instance\ | |
\ feature columns.\nThe input must be JSONL with objects at each line,\ | |
\ CSV, BigQuery\nor TfRecord." | |
isOptional: true | |
parameterType: STRING | |
labels: | |
defaultValue: {} | |
description: 'The labels with user-defined metadata to | |
organize your BatchPredictionJobs. Label keys and values can be no | |
longer than 64 characters (Unicode codepoints), can only contain | |
lowercase letters, numeric characters, underscores and dashes. | |
International characters are allowed. See https://goo.gl/xmQnxf for | |
more information and examples of labels.' | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
description: 'Location for creating the BatchPredictionJob. | |
If not set, default to us-central1.' | |
isOptional: true | |
parameterType: STRING | |
machine_type: | |
defaultValue: '' | |
description: 'The type of machine for running batch | |
prediction on dedicated resources. If the Model supports | |
DEDICATED_RESOURCES this config may be provided (and the job will use | |
these resources). If the Model doesn''t support AUTOMATIC_RESOURCES, | |
this config must be provided. For more details about the | |
BatchDedicatedResources, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#BatchDedicatedResources. | |
For more details about the machine spec, see | |
https://cloud.google.com/vertex-ai/docs/reference/rest/v1/MachineSpec' | |
isOptional: true | |
parameterType: STRING | |
manual_batch_tuning_parameters_batch_size: | |
defaultValue: 0.0 | |
description: 'The number of | |
the records (e.g. instances) of the operation given in each batch to a | |
machine replica. Machine type, and size of a single record should be | |
considered when setting this parameter, higher value speeds up the | |
batch operation''s execution, but too high value will result in a whole | |
batch not fitting in a machine''s memory, and the whole operation will | |
fail. The default value is 4.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
max_replica_count: | |
defaultValue: 0.0 | |
description: 'The maximum number of machine replicas the batch operation | |
may be scaled | |
to. Only used if `machine_type` is set. Default is 10.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
model_parameters: | |
defaultValue: {} | |
description: The parameters that govern the predictions. The schema of the | |
parameters | |
isOptional: true | |
parameterType: STRUCT | |
predictions_format: | |
defaultValue: jsonl | |
description: "The format in which Vertex AI gives the predictions. Must\ | |
\ be one of the\nModel's supportedOutputStorageFormats. If not set, default\ | |
\ to \"jsonl\".\nFor more details about this output config, see\n https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.batchPredictionJobs#OutputConfig." | |
isOptional: true | |
parameterType: STRING | |
project: | |
description: Project to create the BatchPredictionJob. | |
parameterType: STRING | |
starting_replica_count: | |
defaultValue: 0.0 | |
description: 'The number of machine replicas | |
used at the start of the batch operation. If not set, Vertex AI | |
decides starting number, not greater than `max_replica_count`. Only | |
used if `machine_type` is set.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
outputDefinitions: | |
artifacts: | |
batchpredictionjob: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
description: '[**Deprecated. Use gcs_output_directory and bigquery_output_table | |
instead.**] Artifact | |
representation of the created batch prediction job.' | |
bigquery_output_table: | |
artifactType: | |
schemaTitle: google.BQTable | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
bigquery_output_table is specified.' | |
gcs_output_directory: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: 'Artifact tracking the batch prediction job output. This is | |
only | |
available if | |
gcs_destination_output_uri_prefix is specified.' | |
parameters: | |
gcp_resources: | |
description: 'Serialized gcp_resources proto tracking the batch prediction | |
job. | |
For more details, see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
comp-model-evaluation: | |
executorLabel: exec-model-evaluation | |
inputDefinitions: | |
artifacts: | |
batch_prediction_job: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
parameters: | |
dataflow_disk_size: | |
defaultValue: 50.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-4 | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_workers_num: | |
defaultValue: 100.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
isOptional: true | |
parameterType: BOOLEAN | |
dataflow_workers_num: | |
defaultValue: 10.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
example_weight_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
ground_truth_column: | |
parameterType: STRING | |
ground_truth_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
prediction_id_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
prediction_label_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
prediction_score_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
predictions_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
problem_type: | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
root_dir: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-evaluation-2: | |
executorLabel: exec-model-evaluation-2 | |
inputDefinitions: | |
artifacts: | |
batch_prediction_job: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
parameters: | |
dataflow_disk_size: | |
defaultValue: 50.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-4 | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_workers_num: | |
defaultValue: 100.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
isOptional: true | |
parameterType: BOOLEAN | |
dataflow_workers_num: | |
defaultValue: 10.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
example_weight_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
ground_truth_column: | |
parameterType: STRING | |
ground_truth_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
prediction_id_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
prediction_label_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
prediction_score_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
predictions_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
problem_type: | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
root_dir: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-evaluation-3: | |
executorLabel: exec-model-evaluation-3 | |
inputDefinitions: | |
artifacts: | |
batch_prediction_job: | |
artifactType: | |
schemaTitle: google.VertexBatchPredictionJob | |
schemaVersion: 0.0.1 | |
parameters: | |
dataflow_disk_size: | |
defaultValue: 50.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-4 | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_workers_num: | |
defaultValue: 100.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
isOptional: true | |
parameterType: BOOLEAN | |
dataflow_workers_num: | |
defaultValue: 10.0 | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
example_weight_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
ground_truth_column: | |
parameterType: STRING | |
ground_truth_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
prediction_id_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
prediction_label_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
prediction_score_column: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
predictions_format: | |
defaultValue: jsonl | |
isOptional: true | |
parameterType: STRING | |
problem_type: | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
root_dir: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-evaluation-import: | |
executorLabel: exec-model-evaluation-import | |
inputDefinitions: | |
artifacts: | |
classification_metrics: | |
artifactType: | |
schemaTitle: google.ClassificationMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of classification metrics generated from the | |
classification evaluation component.' | |
isOptional: true | |
explanation: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: 'Path for model explanation metrics generated from an evaluation | |
component.' | |
isOptional: true | |
feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: 'The feature attributions metrics artifact generated | |
from the feature attribution component.' | |
isOptional: true | |
forecasting_metrics: | |
artifactType: | |
schemaTitle: google.ForecastingMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of forecasting metrics generated from the | |
forecasting evaluation component.' | |
isOptional: true | |
metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: Path of metrics generated from an evaluation component. | |
isOptional: true | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
description: 'Vertex model resource that will be the parent resource of | |
the | |
uploaded evaluation.' | |
question_answering_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
regression_metrics: | |
artifactType: | |
schemaTitle: google.RegressionMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of regression metrics generated from the regression | |
evaluation component.' | |
isOptional: true | |
summarization_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
text_generation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
dataset_path: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataset_paths: | |
defaultValue: [] | |
isOptional: true | |
parameterType: LIST | |
dataset_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
display_name: | |
defaultValue: '' | |
description: The display name for the uploaded model evaluation resource. | |
isOptional: true | |
parameterType: STRING | |
problem_type: | |
description: 'The problem type of the metrics being imported to the | |
VertexModel. `classification`, `regression`, and `forecasting` are the | |
currently supported problem types. Must be provided when `metrics` is | |
provided.' | |
isOptional: true | |
parameterType: STRING | |
outputDefinitions: | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-evaluation-import-2: | |
executorLabel: exec-model-evaluation-import-2 | |
inputDefinitions: | |
artifacts: | |
classification_metrics: | |
artifactType: | |
schemaTitle: google.ClassificationMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of classification metrics generated from the | |
classification evaluation component.' | |
isOptional: true | |
explanation: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: 'Path for model explanation metrics generated from an evaluation | |
component.' | |
isOptional: true | |
feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: 'The feature attributions metrics artifact generated | |
from the feature attribution component.' | |
isOptional: true | |
forecasting_metrics: | |
artifactType: | |
schemaTitle: google.ForecastingMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of forecasting metrics generated from the | |
forecasting evaluation component.' | |
isOptional: true | |
metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: Path of metrics generated from an evaluation component. | |
isOptional: true | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
description: 'Vertex model resource that will be the parent resource of | |
the | |
uploaded evaluation.' | |
question_answering_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
regression_metrics: | |
artifactType: | |
schemaTitle: google.RegressionMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of regression metrics generated from the regression | |
evaluation component.' | |
isOptional: true | |
summarization_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
text_generation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
dataset_path: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataset_paths: | |
defaultValue: [] | |
isOptional: true | |
parameterType: LIST | |
dataset_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
display_name: | |
defaultValue: '' | |
description: The display name for the uploaded model evaluation resource. | |
isOptional: true | |
parameterType: STRING | |
problem_type: | |
description: 'The problem type of the metrics being imported to the | |
VertexModel. `classification`, `regression`, and `forecasting` are the | |
currently supported problem types. Must be provided when `metrics` is | |
provided.' | |
isOptional: true | |
parameterType: STRING | |
outputDefinitions: | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-evaluation-import-3: | |
executorLabel: exec-model-evaluation-import-3 | |
inputDefinitions: | |
artifacts: | |
classification_metrics: | |
artifactType: | |
schemaTitle: google.ClassificationMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of classification metrics generated from the | |
classification evaluation component.' | |
isOptional: true | |
explanation: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: 'Path for model explanation metrics generated from an evaluation | |
component.' | |
isOptional: true | |
feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: 'The feature attributions metrics artifact generated | |
from the feature attribution component.' | |
isOptional: true | |
forecasting_metrics: | |
artifactType: | |
schemaTitle: google.ForecastingMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of forecasting metrics generated from the | |
forecasting evaluation component.' | |
isOptional: true | |
metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
description: Path of metrics generated from an evaluation component. | |
isOptional: true | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
description: 'Vertex model resource that will be the parent resource of | |
the | |
uploaded evaluation.' | |
question_answering_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
regression_metrics: | |
artifactType: | |
schemaTitle: google.RegressionMetrics | |
schemaVersion: 0.0.1 | |
description: 'Path of regression metrics generated from the regression | |
evaluation component.' | |
isOptional: true | |
summarization_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
text_generation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
dataset_path: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataset_paths: | |
defaultValue: [] | |
isOptional: true | |
parameterType: LIST | |
dataset_type: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
display_name: | |
defaultValue: '' | |
description: The display name for the uploaded model evaluation resource. | |
isOptional: true | |
parameterType: STRING | |
problem_type: | |
description: 'The problem type of the metrics being imported to the | |
VertexModel. `classification`, `regression`, and `forecasting` are the | |
currently supported problem types. Must be provided when `metrics` is | |
provided.' | |
isOptional: true | |
parameterType: STRING | |
outputDefinitions: | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-upload: | |
executorLabel: exec-model-upload | |
inputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parent_model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
description: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
display_name: | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
explanation_metadata: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
labels: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-upload-2: | |
executorLabel: exec-model-upload-2 | |
inputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parent_model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
description: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
display_name: | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
explanation_metadata: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
labels: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-model-upload-3: | |
executorLabel: exec-model-upload-3 | |
inputDefinitions: | |
artifacts: | |
explanation_metadata_artifact: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parent_model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
unmanaged_container_model: | |
artifactType: | |
schemaTitle: google.UnmanagedContainerModel | |
schemaVersion: 0.0.1 | |
isOptional: true | |
parameters: | |
description: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
display_name: | |
parameterType: STRING | |
encryption_spec_key_name: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
explanation_metadata: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
explanation_parameters: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
labels: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
location: | |
defaultValue: us-central1 | |
isOptional: true | |
parameterType: STRING | |
project: | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
model: | |
artifactType: | |
schemaTitle: google.VertexModel | |
schemaVersion: 0.0.1 | |
parameters: | |
gcp_resources: | |
parameterType: STRING | |
comp-read-input-uri: | |
executorLabel: exec-read-input-uri | |
inputDefinitions: | |
artifacts: | |
split_uri: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: Tbe path to the file that contains Dataset data. | |
outputDefinitions: | |
parameters: | |
Output: | |
parameterType: LIST | |
comp-read-input-uri-2: | |
executorLabel: exec-read-input-uri-2 | |
inputDefinitions: | |
artifacts: | |
split_uri: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: Tbe path to the file that contains Dataset data. | |
outputDefinitions: | |
parameters: | |
Output: | |
parameterType: LIST | |
comp-set-optional-inputs: | |
executorLabel: exec-set-optional-inputs | |
inputDefinitions: | |
artifacts: | |
vertex_dataset: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The Vertex dataset when data source is Vertex dataset. | |
parameters: | |
data_source_bigquery_table_path: | |
description: The BigQuery table when data source is BQ. | |
parameterType: STRING | |
data_source_csv_filenames: | |
description: The CSV GCS path when data source is CSV. | |
parameterType: STRING | |
location: | |
description: The GCP region that runs the pipeline components. | |
parameterType: STRING | |
model_display_name: | |
description: The uploaded model's display name. | |
parameterType: STRING | |
project: | |
description: The GCP project that runs the pipeline components. | |
parameterType: STRING | |
outputDefinitions: | |
parameters: | |
data_source_bigquery_table_path: | |
parameterType: STRING | |
data_source_csv_filenames: | |
parameterType: STRING | |
model_display_name: | |
parameterType: STRING | |
comp-string-not-empty: | |
executorLabel: exec-string-not-empty | |
inputDefinitions: | |
parameters: | |
value: | |
description: String value to be checked. | |
parameterType: STRING | |
outputDefinitions: | |
parameters: | |
Output: | |
parameterType: STRING | |
comp-tabular-stats-and-example-gen: | |
executorLabel: exec-tabular-stats-and-example-gen | |
inputDefinitions: | |
parameters: | |
additional_experiments: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
additional_experiments_json: | |
defaultValue: {} | |
isOptional: true | |
parameterType: STRUCT | |
data_source_bigquery_table_path: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
data_source_csv_filenames: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
dataflow_disk_size_gb: | |
defaultValue: 40.0 | |
description: 'The disk size, in gigabytes, to use | |
on each Dataflow worker instance. If not set, default to 40.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_machine_type: | |
defaultValue: n1-standard-16 | |
description: 'The machine type used for dataflow | |
jobs. If not set, default to n1-standard-16.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_max_num_workers: | |
defaultValue: 25.0 | |
description: 'The number of workers to run the | |
dataflow job. If not set, default to 25.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
dataflow_service_account: | |
defaultValue: '' | |
description: 'Custom service account to run | |
dataflow jobs.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
description: 'Dataflow''s fully qualified subnetwork | |
name, when empty the default subnetwork will be used. More | |
details: | |
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
description: 'Specifies whether Dataflow | |
workers use public IP addresses.' | |
isOptional: true | |
parameterType: BOOLEAN | |
enable_probabilistic_inference: | |
defaultValue: false | |
isOptional: true | |
parameterType: BOOLEAN | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: Customer-managed encryption key. | |
isOptional: true | |
parameterType: STRING | |
location: | |
description: 'Location for running dataset statistics and example | |
generation.' | |
parameterType: STRING | |
optimization_objective: | |
defaultValue: '' | |
description: "Objective function the model is optimizing\ntowards. The training\ | |
\ process creates a model that maximizes/minimizes\nthe value of the objective\ | |
\ function over the validation set. The\nsupported optimization objectives\ | |
\ depend on the prediction type. If the\nfield is not set, a default objective\ | |
\ function is used.\n classification: \"maximize-au-roc\" (default) -\ | |
\ Maximize the\n area under the receiver operating characteristic (ROC)\ | |
\ curve.\n \"minimize-log-loss\" - Minimize log loss. \"maximize-au-prc\"\ | |
\ -\n Maximize the area under the precision-recall curve.\n \"maximize-precision-at-recall\"\ | |
\ - Maximize precision for a specified\n recall value. \"maximize-recall-at-precision\"\ | |
\ - Maximize recall for a\n specified precision value.\n classification\ | |
\ (multi-class): \"minimize-log-loss\" (default) - Minimize\n log loss.\n\ | |
\ regression: \"minimize-rmse\" (default) - Minimize root-mean-squared\n\ | |
\ error (RMSE). \"minimize-mae\" - Minimize mean-absolute error (MAE).\n\ | |
\ \"minimize-rmsle\" - Minimize root-mean-squared log error (RMSLE)." | |
isOptional: true | |
parameterType: STRING | |
optimization_objective_precision_value: | |
defaultValue: -1.0 | |
description: 'Required when | |
optimization_objective is "maximize-recall-at-precision". Must be | |
between 0 and 1, inclusive.' | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
optimization_objective_recall_value: | |
defaultValue: -1.0 | |
description: 'Required when | |
optimization_objective is "maximize-precision-at-recall". Must be | |
between 0 and 1, inclusive.' | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
predefined_split_key: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
prediction_type: | |
description: 'The prediction type. Supported values: | |
"classification", "regression".' | |
parameterType: STRING | |
project: | |
description: 'Project to run dataset statistics and example | |
generation.' | |
parameterType: STRING | |
quantiles: | |
defaultValue: [] | |
isOptional: true | |
parameterType: LIST | |
request_type: | |
defaultValue: COLUMN_STATS_ONLY | |
isOptional: true | |
parameterType: STRING | |
root_dir: | |
description: The Cloud Storage location to store the output. | |
parameterType: STRING | |
run_distillation: | |
defaultValue: false | |
description: 'True if in distillation mode. The default value | |
is false.' | |
isOptional: true | |
parameterType: BOOLEAN | |
stratified_split_key: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
target_column_name: | |
description: The target column name. | |
parameterType: STRING | |
test_fraction: | |
defaultValue: -1.0 | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
timestamp_split_key: | |
defaultValue: '' | |
isOptional: true | |
parameterType: STRING | |
training_fraction: | |
defaultValue: -1.0 | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
transformations: | |
description: 'Quote escaped JSON string for transformations. Each | |
transformation will apply transform function to given input column. And | |
the result will be used for training. When creating transformation for | |
BigQuery Struct column, the column should be flattened using "." as the | |
delimiter.' | |
parameterType: STRING | |
transformations_path: | |
defaultValue: '' | |
description: 'Path to a GCS file containing JSON | |
string for transformations.' | |
isOptional: true | |
parameterType: STRING | |
validation_fraction: | |
defaultValue: -1.0 | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
weight_column_name: | |
defaultValue: '' | |
description: The weight column name. | |
isOptional: true | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
dataset_schema: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The schema of the dataset. | |
dataset_stats: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The stats of the dataset. | |
eval_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The eval split. | |
instance_baseline: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The instance baseline used to calculate explanations. | |
metadata: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The tabular example gen metadata. | |
test_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The test split. | |
train_split: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
description: The train split. | |
parameters: | |
downsampled_test_split_json: | |
description: The downsampled test split JSON object. | |
parameterType: LIST | |
gcp_resources: | |
description: 'GCP resources created by this component. For more details, | |
see | |
https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/proto/README.md.' | |
parameterType: STRING | |
test_split_json: | |
description: The test split JSON object. | |
parameterType: LIST | |
comp-write-bp-result-path: | |
executorLabel: exec-write-bp-result-path | |
inputDefinitions: | |
artifacts: | |
bp_job: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The batch prediction job artifact. | |
outputDefinitions: | |
artifacts: | |
result: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
comp-write-bp-result-path-2: | |
executorLabel: exec-write-bp-result-path-2 | |
inputDefinitions: | |
artifacts: | |
bp_job: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The batch prediction job artifact. | |
outputDefinitions: | |
artifacts: | |
result: | |
artifactType: | |
schemaTitle: system.Dataset | |
schemaVersion: 0.0.1 | |
deploymentSpec: | |
executors: | |
exec-automl-tabular-cv-trainer: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", | |
"\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", | |
"\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", | |
\"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", | |
"\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", | |
"\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", | |
"\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", | |
"{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", | |
"{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", | |
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", | |
"{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", | |
"{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", | |
"{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", | |
"{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", | |
"{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", | |
"{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", | |
\"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-cv-trainer-2: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-cv-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"l2l_cv_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", | |
"\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", | |
"\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", | |
\"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", | |
"\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", | |
"\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", | |
"\", \"--valid_trials_completed_threshold=0.7\", \"--num_selected_trials=", | |
"{{$.inputs.parameters[''num_selected_trials'']}}", "\", \"--num_selected_features=", | |
"{{$.inputs.parameters[''num_selected_features'']}}", "\", \"--lro_job_info=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", | |
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_cv_splits=", | |
"{{$.inputs.artifacts[''materialized_cv_splits''].uri}}", "\", \"--tuning_result_input_path=", | |
"{{$.inputs.artifacts[''tuning_result_input''].uri}}", "\", \"--tuning_result_output_path=", | |
"{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", | |
"{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", | |
"{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", | |
"{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_custom_job=true\", | |
\"--use_json=true\", \"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-ensemble: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", | |
"\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", | |
\"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", | |
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", | |
"\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", | |
\"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", | |
"\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", | |
"\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", | |
"\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", | |
\"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", | |
"\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", | |
"{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", | |
"{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", | |
"\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", | |
"\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-ensemble-2: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", | |
"\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", | |
\"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", | |
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", | |
"\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", | |
\"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", | |
"\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", | |
"\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", | |
"\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", | |
\"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", | |
"\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", | |
"{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", | |
"{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", | |
"\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", | |
"\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-ensemble-3: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-ensemble-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-highmem-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"ensemble\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", | |
"\", \"--model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/model\", | |
\"--custom_model_output_path=", "{{$.inputs.parameters[''root_dir'']}}", | |
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/custom_model\", \"--error_file_path=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--export_custom_model=", "{{$.inputs.parameters[''export_additional_model_without_custom_ops'']}}", | |
"\", \"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", | |
\"--dataset_schema_path=", "{{$.inputs.artifacts[''dataset_schema''].uri}}", | |
"\", \"--tuning_result_input_path=", "{{$.inputs.artifacts[''tuning_result_input''].uri}}", | |
"\", \"--instance_baseline_path=", "{{$.inputs.artifacts[''instance_baseline''].uri}}", | |
"\", \"--warmup_data=", "{{$.inputs.artifacts[''warmup_data''].uri}}", "\", | |
\"--prediction_docker_uri=", "us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125", | |
"\", \"--model_path=", "{{$.outputs.artifacts[''model''].uri}}", "\", \"--custom_model_path=", | |
"{{$.outputs.artifacts[''model_without_custom_ops''].uri}}", "\", \"--explanation_metadata_path=", | |
"{{$.outputs.parameters[''explanation_metadata''].output_file}}", ",", "{{$.outputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\", \"--explanation_parameters_path=", "{{$.outputs.parameters[''explanation_parameters''].output_file}}", | |
"\", \"--model_architecture_path=", "{{$.outputs.artifacts[''model_architecture''].uri}}", | |
"\", \"--use_json=true\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-finalizer: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-finalizer-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"cancel_l2l_tuner\", \"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", | |
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", \"--cleanup_lro_job_infos=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-infra-validator: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 | |
resources: | |
cpuLimit: 8.0 | |
memoryLimit: 52.0 | |
exec-automl-tabular-infra-validator-2: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 | |
resources: | |
cpuLimit: 8.0 | |
memoryLimit: 52.0 | |
exec-automl-tabular-infra-validator-3: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
image: us-docker.pkg.dev/vertex-ai/automl-tabular/prediction-server:20230817_0125 | |
resources: | |
cpuLimit: 8.0 | |
memoryLimit: 52.0 | |
exec-automl-tabular-stage-1-tuner: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", | |
"\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", | |
"\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", | |
"\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", | |
"\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", | |
"\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", | |
"\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", | |
\"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", | |
"\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", | |
"\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", | |
"\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", | |
"\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", | |
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", | |
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", | |
"{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", | |
"{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", | |
"{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", | |
"{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", | |
"{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", | |
"{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", | |
"{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", | |
\"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-stage-1-tuner-2: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-stage-1-tuner-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"l2l_stage_1_tuner\", \"--transform_output_path=", "{{$.inputs.artifacts[''transform_output''].uri}}", | |
"\", \"--training_docker_uri=", "us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", | |
"\", \"--feature_selection_result_path=", "{{$.inputs.artifacts[''feature_ranking''].uri}}", | |
"\", \"--disable_early_stopping=", "{{$.inputs.parameters[''disable_early_stopping'']}}", | |
"\", \"--tune_feature_selection_rate=", "{{$.inputs.parameters[''tune_feature_selection_rate'']}}", | |
"\", \"--reduce_search_space_mode=", "{{$.inputs.parameters[''reduce_search_space_mode'']}}", | |
"\", \"--component_id={{$.pipeline_task_uuid}}\", \"--training_base_dir=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/train\", | |
\"--num_parallel_trial=", "{{$.inputs.parameters[''num_parallel_trials'']}}", | |
"\", \"--single_run_max_secs=", "{{$.inputs.parameters[''single_run_max_secs'']}}", | |
"\", \"--deadline_hours=", "{{$.inputs.parameters[''deadline_hours'']}}", | |
"\", \"--num_selected_trials=", "{{$.inputs.parameters[''num_selected_trials'']}}", | |
"\", \"--num_selected_features=", "{{$.inputs.parameters[''num_selected_features'']}}", | |
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", | |
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--materialized_train_split=", | |
"{{$.inputs.artifacts[''materialized_train_split''].uri}}", "\", \"--materialized_eval_split=", | |
"{{$.inputs.artifacts[''materialized_eval_split''].uri}}", "\", \"--is_distill=", | |
"{{$.inputs.parameters[''run_distillation'']}}", "\", \"--tuning_result_output_path=", | |
"{{$.outputs.artifacts[''tuning_result_output''].uri}}", "\", \"--kms_key_name=", | |
"{{$.inputs.parameters[''encryption_spec_key_name'']}}", "\", \"--gcp_resources_path=", | |
"{{$.outputs.parameters[''gcp_resources''].output_file}}", "\", \"--execution_metrics_path=", | |
"{{$.outputs.parameters[''execution_metrics''].output_file}}", "\", \"--use_json=true\", | |
\"--log_level=ERROR\", \"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-transform: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", | |
"{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", | |
\"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", | |
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", | |
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", | |
"{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", | |
"{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", | |
"\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", | |
\"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", | |
"\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", | |
"\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", | |
"\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", | |
"\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", | |
"\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", | |
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", | |
\"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", | |
\"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", | |
"\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", | |
"\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", | |
"\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", | |
"\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", | |
"\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", | |
"\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", | |
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", | |
\"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", | |
"\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-automl-tabular-transform-2: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"transform\", \"--is_mp=true\", \"--transform_output_artifact_path=", | |
"{{$.outputs.artifacts[''transform_output''].uri}}", "\", \"--transform_output_path=", | |
"{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform\", | |
\"--materialized_splits_output_path=", "{{$.inputs.parameters[''root_dir'']}}", | |
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/transform_materialized\", | |
\"--metadata_path=", "{{$.inputs.artifacts[''metadata''].uri}}", "\", \"--dataset_schema_path=", | |
"{{$.inputs.artifacts[''dataset_schema''].uri}}", "\", \"--train_split=", | |
"{{$.inputs.artifacts[''train_split''].uri}}", "\", \"--eval_split=", "{{$.inputs.artifacts[''eval_split''].uri}}", | |
"\", \"--test_split=", "{{$.inputs.artifacts[''test_split''].uri}}", "\", | |
\"--materialized_train_split=", "{{$.outputs.artifacts[''materialized_train_split''].uri}}", | |
"\", \"--materialized_eval_split=", "{{$.outputs.artifacts[''materialized_eval_split''].uri}}", | |
"\", \"--materialized_test_split=", "{{$.outputs.artifacts[''materialized_test_split''].uri}}", | |
"\", \"--training_schema_path=", "{{$.outputs.artifacts[''training_schema_uri''].uri}}", | |
"\", \"--job_name=automl-tabular-transform-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", | |
"\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", | |
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", | |
\"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", | |
\"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", | |
"\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", | |
"\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", | |
"\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", | |
"\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", | |
"\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", | |
"\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", | |
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", | |
\"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", | |
"\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-bool-identity: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _bool_identity | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ | |
\ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ | |
\ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" | |
image: python:3.7 | |
exec-bool-identity-2: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _bool_identity | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ | |
\ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ | |
\ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" | |
image: python:3.7 | |
exec-bool-identity-3: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _bool_identity | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _bool_identity(value: bool) -> str:\n \"\"\"Returns boolean\ | |
\ value.\n\n Args:\n value: Boolean value to return\n\n Returns:\n\ | |
\ Boolean value.\n \"\"\"\n return 'true' if value else 'false'\n\n" | |
image: python:3.7 | |
exec-calculate-training-parameters: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _calculate_training_parameters | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ | |
\ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ | |
\ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ | |
\ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ | |
\ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ | |
\ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ | |
\ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ | |
\ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ | |
\"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ | |
\ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ | |
\ The train budget of creating this model,\n expressed in milli node\ | |
\ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ | |
\ Number of parallel trails for stage 2.\n run_distillation: Whether\ | |
\ to run distill in the training pipeline.\n is_skip_architecture_search:\ | |
\ If component is being called in the\n skip_architecture_search pipeline.\n\ | |
\ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ | |
\ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ | |
\ stage_1_num_selected_trials: Number of selected trails for stage\ | |
\ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ | |
\ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ | |
\ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ | |
\ Maximum number seconds to for a single stage\n 2\n training\ | |
\ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ | |
\ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ | |
\ The reduce search space mode. Possible values:\n minimal, regular,\ | |
\ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ | |
\ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ | |
\ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ | |
\ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ | |
\ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ | |
\ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ | |
\ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ | |
\ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ | |
\ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ | |
\ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ | |
\ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ | |
\ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ | |
\ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ | |
\ # All of magic number \"1.3\" above is because the trial doesn't\n\ | |
\ # always finish in time_per_trial. 1.3 is an empirical safety margin\ | |
\ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ | |
\ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ | |
\ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ | |
\ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ | |
\ case. Phase 2\n # can't finish in time after the deadline is cut,\ | |
\ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ | |
\ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ | |
\ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ | |
\ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ | |
\ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ | |
\ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ | |
\ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ | |
\ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ | |
\ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ | |
\ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ | |
\ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ | |
\ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ | |
\ of magic number \"1.3\" above is because the trial doesn't always\n \ | |
\ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ | |
\ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ | |
\ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ | |
\ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ | |
\ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ | |
\ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ | |
\ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ | |
\ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ | |
\ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ | |
\ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ | |
\ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ | |
\ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ | |
\ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ | |
\ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ | |
\ reduce_search_space_mode,\n )\n\n" | |
image: python:3.7 | |
exec-calculate-training-parameters-2: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _calculate_training_parameters | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _calculate_training_parameters(\n stage_1_num_parallel_trials:\ | |
\ int,\n train_budget_milli_node_hours: float,\n stage_2_num_parallel_trials:\ | |
\ int,\n run_distillation: bool,\n is_skip_architecture_search: bool\ | |
\ = False,\n fast_testing: bool = False,\n) -> NamedTuple(\n 'Outputs',\n\ | |
\ [\n ('stage_1_deadline_hours', float),\n ('stage_1_num_selected_trials',\ | |
\ int),\n ('stage_1_single_run_max_secs', int),\n ('stage_2_deadline_hours',\ | |
\ float),\n ('stage_2_single_run_max_secs', int),\n ('distill_stage_1_deadline_hours',\ | |
\ float),\n ('reduce_search_space_mode', str),\n ],\n):\n \"\"\ | |
\"Calculates training parameters.\n\n Args:\n stage_1_num_parallel_trials:\ | |
\ Number of parallel trails for stage 1.\n train_budget_milli_node_hours:\ | |
\ The train budget of creating this model,\n expressed in milli node\ | |
\ hours i.e. 1,000 value in this field means 1 node\n hour.\n stage_2_num_parallel_trials:\ | |
\ Number of parallel trails for stage 2.\n run_distillation: Whether\ | |
\ to run distill in the training pipeline.\n is_skip_architecture_search:\ | |
\ If component is being called in the\n skip_architecture_search pipeline.\n\ | |
\ fast_testing: Internal flag used for presubmit tests.\n\n Returns:\n\ | |
\ stage_1_deadline_hours: Maximum number of hours to run stage 1.\n\ | |
\ stage_1_num_selected_trials: Number of selected trails for stage\ | |
\ 1.\n stage_1_single_run_max_secs: Maximum number seconds to for a\ | |
\ single stage\n 1\n training trial.\n stage_2_deadline_hours:\ | |
\ Maximum number of hours to run stage 2.\n stage_2_single_run_max_secs:\ | |
\ Maximum number seconds to for a single stage\n 2\n training\ | |
\ trial.\n distill_stage_1_deadline_hours: Maximum number of hours\ | |
\ to run stage 1 for\n the model distillation.\n reduce_search_space_mode:\ | |
\ The reduce search space mode. Possible values:\n minimal, regular,\ | |
\ full.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ | |
\ import collections\n import math\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ | |
\ num_folds = 5\n distill_total_trials = 100\n\n stage_1_deadline_hours\ | |
\ = -1.0\n stage_1_num_selected_trials = -1\n stage_1_single_run_max_secs\ | |
\ = -1\n stage_2_deadline_hours = -1.0\n stage_2_single_run_max_secs =\ | |
\ -1\n distill_stage_1_deadline_hours = 1.0\n reduce_search_space_mode\ | |
\ = 'regular'\n\n if is_skip_architecture_search:\n stage_2_deadline_hours\ | |
\ = train_budget_milli_node_hours / 1000.0\n stage_2_single_run_max_secs\ | |
\ = int(stage_2_deadline_hours * 3600.0 / 1.3)\n else:\n hours = float(train_budget_milli_node_hours)\ | |
\ / 1000.0\n multiplier = stage_1_num_parallel_trials * hours / 500.0\n\ | |
\ stage_1_single_run_max_secs = int(math.sqrt(multiplier) * 2400.0)\n\ | |
\ phase_2_rounds = int(\n math.sqrt(multiplier) * 100 / stage_2_num_parallel_trials\ | |
\ + 0.5\n )\n if phase_2_rounds < 1:\n phase_2_rounds = 1\n\n\ | |
\ # All of magic number \"1.3\" above is because the trial doesn't\n\ | |
\ # always finish in time_per_trial. 1.3 is an empirical safety margin\ | |
\ here.\n stage_1_deadline_secs = int(\n hours * 3600.0 - 1.3\ | |
\ * stage_1_single_run_max_secs * phase_2_rounds\n )\n\n if stage_1_deadline_secs\ | |
\ < hours * 3600.0 * 0.5:\n stage_1_deadline_secs = int(hours * 3600.0\ | |
\ * 0.5)\n # Phase 1 deadline is the same as phase 2 deadline in this\ | |
\ case. Phase 2\n # can't finish in time after the deadline is cut,\ | |
\ so adjust the time per\n # trial to meet the deadline.\n stage_1_single_run_max_secs\ | |
\ = int(\n stage_1_deadline_secs / (1.3 * phase_2_rounds)\n \ | |
\ )\n\n reduce_search_space_mode = 'minimal'\n if multiplier > 2:\n\ | |
\ reduce_search_space_mode = 'regular'\n if multiplier > 4:\n \ | |
\ reduce_search_space_mode = 'full'\n\n # Stage 2 number of trials\ | |
\ is stage_1_num_selected_trials *\n # num_folds, which should be equal\ | |
\ to phase_2_rounds *\n # stage_2_num_parallel_trials. Use this information\ | |
\ to calculate\n # stage_1_num_selected_trials:\n stage_1_num_selected_trials\ | |
\ = int(\n phase_2_rounds * stage_2_num_parallel_trials / num_folds\n\ | |
\ )\n stage_1_deadline_hours = stage_1_deadline_secs / 3600.0\n\n\ | |
\ stage_2_deadline_hours = hours - stage_1_deadline_hours\n stage_2_single_run_max_secs\ | |
\ = stage_1_single_run_max_secs\n\n if run_distillation:\n # All\ | |
\ of magic number \"1.3\" above is because the trial doesn't always\n \ | |
\ # finish in time_per_trial. 1.3 is an empirical safety margin here.\n\ | |
\ distill_stage_1_deadline_hours = (\n math.ceil(float(distill_total_trials)\ | |
\ / stage_1_num_parallel_trials)\n * stage_1_single_run_max_secs\n\ | |
\ * 1.3\n / 3600.0\n )\n\n if fast_testing:\n \ | |
\ distill_stage_1_deadline_hours = 0.2\n stage_1_deadline_hours = 0.2\n\ | |
\ stage_1_single_run_max_secs = 1\n stage_2_deadline_hours = 0.2\n\ | |
\ stage_2_single_run_max_secs = 1\n\n return collections.namedtuple(\n\ | |
\ 'Outputs',\n [\n 'stage_1_deadline_hours',\n \ | |
\ 'stage_1_num_selected_trials',\n 'stage_1_single_run_max_secs',\n\ | |
\ 'stage_2_deadline_hours',\n 'stage_2_single_run_max_secs',\n\ | |
\ 'distill_stage_1_deadline_hours',\n 'reduce_search_space_mode',\n\ | |
\ ],\n )(\n stage_1_deadline_hours,\n stage_1_num_selected_trials,\n\ | |
\ stage_1_single_run_max_secs,\n stage_2_deadline_hours,\n \ | |
\ stage_2_single_run_max_secs,\n distill_stage_1_deadline_hours,\n\ | |
\ reduce_search_space_mode,\n )\n\n" | |
image: python:3.7 | |
exec-feature-attribution: | |
container: | |
args: | |
- --task | |
- explanation | |
- --setup_file | |
- /setup.py | |
- --project_id | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --root_dir | |
- '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' | |
- --batch_prediction_format | |
- '{{$.inputs.parameters[''predictions_format'']}}' | |
- '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", | |
"{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", | |
{"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", | |
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", | |
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' | |
- --dataflow_job_prefix | |
- evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
- --dataflow_service_account | |
- '{{$.inputs.parameters[''dataflow_service_account'']}}' | |
- --dataflow_disk_size | |
- '{{$.inputs.parameters[''dataflow_disk_size'']}}' | |
- --dataflow_machine_type | |
- '{{$.inputs.parameters[''dataflow_machine_type'']}}' | |
- --dataflow_workers_num | |
- '{{$.inputs.parameters[''dataflow_workers_num'']}}' | |
- --dataflow_max_workers_num | |
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' | |
- --dataflow_subnetwork | |
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}' | |
- --dataflow_use_public_ips | |
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' | |
- --kms_key_name | |
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}' | |
- --force_direct_runner | |
- '{{$.inputs.parameters[''force_direct_runner'']}}' | |
- --gcs_output_path | |
- '{{$.outputs.artifacts[''feature_attributions''].path}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- /main.py | |
image: gcr.io/ml-pipeline/model-evaluation:v0.9 | |
exec-feature-attribution-2: | |
container: | |
args: | |
- --task | |
- explanation | |
- --setup_file | |
- /setup.py | |
- --project_id | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --root_dir | |
- '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' | |
- --batch_prediction_format | |
- '{{$.inputs.parameters[''predictions_format'']}}' | |
- '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", | |
"{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", | |
{"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", | |
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", | |
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' | |
- --dataflow_job_prefix | |
- evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
- --dataflow_service_account | |
- '{{$.inputs.parameters[''dataflow_service_account'']}}' | |
- --dataflow_disk_size | |
- '{{$.inputs.parameters[''dataflow_disk_size'']}}' | |
- --dataflow_machine_type | |
- '{{$.inputs.parameters[''dataflow_machine_type'']}}' | |
- --dataflow_workers_num | |
- '{{$.inputs.parameters[''dataflow_workers_num'']}}' | |
- --dataflow_max_workers_num | |
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' | |
- --dataflow_subnetwork | |
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}' | |
- --dataflow_use_public_ips | |
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' | |
- --kms_key_name | |
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}' | |
- --force_direct_runner | |
- '{{$.inputs.parameters[''force_direct_runner'']}}' | |
- --gcs_output_path | |
- '{{$.outputs.artifacts[''feature_attributions''].path}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- /main.py | |
image: gcr.io/ml-pipeline/model-evaluation:v0.9 | |
exec-feature-attribution-3: | |
container: | |
args: | |
- --task | |
- explanation | |
- --setup_file | |
- /setup.py | |
- --project_id | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --root_dir | |
- '{{$.pipeline_root}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' | |
- --batch_prediction_format | |
- '{{$.inputs.parameters[''predictions_format'']}}' | |
- '{"IfPresent": {"InputName": "predictions_gcs_source", "Then": ["--batch_prediction_gcs_source", | |
"{{$.inputs.artifacts[''predictions_gcs_source''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "predictions_bigquery_source", "Then": ["--batch_prediction_bigquery_source", | |
{"Concat": ["bq://", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''projectId'']}}", | |
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''datasetId'']}}", | |
".", "{{$.inputs.artifacts[''predictions_bigquery_source''].metadata[''tableId'']}}"]}]}}' | |
- --dataflow_job_prefix | |
- evaluation-feautre-attribution-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
- --dataflow_service_account | |
- '{{$.inputs.parameters[''dataflow_service_account'']}}' | |
- --dataflow_disk_size | |
- '{{$.inputs.parameters[''dataflow_disk_size'']}}' | |
- --dataflow_machine_type | |
- '{{$.inputs.parameters[''dataflow_machine_type'']}}' | |
- --dataflow_workers_num | |
- '{{$.inputs.parameters[''dataflow_workers_num'']}}' | |
- --dataflow_max_workers_num | |
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' | |
- --dataflow_subnetwork | |
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}' | |
- --dataflow_use_public_ips | |
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' | |
- --kms_key_name | |
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}' | |
- --force_direct_runner | |
- '{{$.inputs.parameters[''force_direct_runner'']}}' | |
- --gcs_output_path | |
- '{{$.outputs.artifacts[''feature_attributions''].path}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- /main.py | |
image: gcr.io/ml-pipeline/model-evaluation:v0.9 | |
exec-importer: | |
importer: | |
artifactUri: | |
runtimeParameter: uri | |
typeSchema: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
exec-merge-materialized-splits: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _merge_materialized_splits | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _merge_materialized_splits(\n split_0: dsl.InputPath('MaterializedSplit'),\n\ | |
\ split_1: dsl.InputPath('MaterializedSplit'),\n splits: dsl.OutputPath('MaterializedSplit'),\n\ | |
):\n \"\"\"Merge two materialized splits.\n\n Args:\n split_0: The\ | |
\ first materialized split.\n split_1: The second materialized split.\n\ | |
\ splits: The merged materialized split.\n \"\"\"\n with open(split_0,\ | |
\ 'r') as f:\n split_0_content = f.read()\n with open(split_1, 'r')\ | |
\ as f:\n split_1_content = f.read()\n with open(splits, 'w') as f:\n\ | |
\ f.write(','.join([split_0_content, split_1_content]))\n\n" | |
image: python:3.7 | |
exec-model-batch-explanation: | |
container: | |
args: | |
- --type | |
- BatchPredictionJob | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", | |
"\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", | |
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", | |
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", | |
"\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", | |
", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", | |
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", | |
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", | |
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": | |
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": | |
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": | |
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": | |
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": | |
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": | |
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", | |
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", | |
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": | |
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- launcher | |
image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 | |
exec-model-batch-explanation-2: | |
container: | |
args: | |
- --type | |
- BatchPredictionJob | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", | |
"\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", | |
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", | |
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", | |
"\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", | |
", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", | |
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", | |
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", | |
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": | |
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": | |
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": | |
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": | |
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": | |
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": | |
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", | |
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", | |
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": | |
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- launcher | |
image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 | |
exec-model-batch-explanation-3: | |
container: | |
args: | |
- --type | |
- BatchPredictionJob | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", | |
"\", ", " \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", | |
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", | |
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", | |
"\"", "}", "}", ", \"model_parameters\": ", "{{$.inputs.parameters[''model_parameters'']}}", | |
", \"output_config\": {", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", | |
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", | |
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", | |
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": | |
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": | |
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": | |
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": | |
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": | |
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": | |
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", | |
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", | |
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\"", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": | |
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- launcher | |
image: gcr.io/ml-pipeline/automl-tables-private:1.0.13 | |
exec-model-batch-predict: | |
container: | |
args: | |
- --type | |
- BatchPredictionJob | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", | |
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": | |
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, | |
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", | |
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", | |
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", | |
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", | |
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" | |
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", | |
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, | |
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": | |
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": | |
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": | |
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", | |
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", | |
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", | |
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": | |
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": | |
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": | |
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": | |
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": | |
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": | |
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", | |
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", | |
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": | |
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 | |
exec-model-batch-predict-2: | |
container: | |
args: | |
- --type | |
- BatchPredictionJob | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", | |
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": | |
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, | |
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", | |
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", | |
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", | |
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", | |
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" | |
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", | |
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, | |
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": | |
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": | |
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": | |
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", | |
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", | |
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", | |
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": | |
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": | |
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": | |
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": | |
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": | |
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": | |
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", | |
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", | |
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": | |
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 | |
exec-model-batch-predict-3: | |
container: | |
args: | |
- --type | |
- BatchPredictionJob | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", | |
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": | |
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, | |
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", | |
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", | |
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", | |
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", | |
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" | |
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", | |
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, | |
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": | |
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": | |
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": | |
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", | |
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", | |
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", | |
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": | |
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": | |
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": | |
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": | |
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": | |
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": | |
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", | |
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", | |
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": | |
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 | |
exec-model-batch-predict-4: | |
container: | |
args: | |
- --type | |
- BatchPredictionJob | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", | |
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": | |
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, | |
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", | |
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", | |
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", | |
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", | |
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" | |
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", | |
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, | |
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": | |
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": | |
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": | |
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", | |
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", | |
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", | |
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": | |
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": | |
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": | |
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": | |
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": | |
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": | |
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", | |
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", | |
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": | |
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 | |
exec-model-batch-predict-5: | |
container: | |
args: | |
- --type | |
- BatchPredictionJob | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''job_display_name'']}}", | |
"\", ", {"IfPresent": {"InputName": "model", "Then": {"Concat": ["\"model\": | |
\"", "{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}", "\","]}}}, | |
" \"input_config\": {", "\"instances_format\": \"", "{{$.inputs.parameters[''instances_format'']}}", | |
"\"", ", \"gcs_source\": {", "\"uris\":", "{{$.inputs.parameters[''gcs_source_uris'']}}", | |
"}", ", \"bigquery_source\": {", "\"input_uri\": \"", "{{$.inputs.parameters[''bigquery_source_input_uri'']}}", | |
"\"", "}", "}", ", \"instance_config\": {", "\"instance_type\": \"", "{{$.inputs.parameters[''instance_type'']}}", | |
"\"", ", \"key_field\": \"", "{{$.inputs.parameters[''key_field'']}}", "\" | |
", {"IfPresent": {"InputName": "included_fields", "Then": {"Concat": [", | |
\"included_fields\": ", "{{$.inputs.parameters[''included_fields'']}}"]}}}, | |
{"IfPresent": {"InputName": "excluded_fields", "Then": {"Concat": [", \"excluded_fields\": | |
", "{{$.inputs.parameters[''excluded_fields'']}}"]}}}, "}", ", \"model_parameters\": | |
", "{{$.inputs.parameters[''model_parameters'']}}", ", \"output_config\": | |
{", "\"predictions_format\": \"", "{{$.inputs.parameters[''predictions_format'']}}", | |
"\"", ", \"gcs_destination\": {", "\"output_uri_prefix\": \"", "{{$.inputs.parameters[''gcs_destination_output_uri_prefix'']}}", | |
"\"", "}", ", \"bigquery_destination\": {", "\"output_uri\": \"", "{{$.inputs.parameters[''bigquery_destination_output_uri'']}}", | |
"\"", "}", "}", ", \"dedicated_resources\": {", "\"machine_spec\": {", "\"machine_type\": | |
\"", "{{$.inputs.parameters[''machine_type'']}}", "\"", ", \"accelerator_type\": | |
\"", "{{$.inputs.parameters[''accelerator_type'']}}", "\"", ", \"accelerator_count\": | |
", "{{$.inputs.parameters[''accelerator_count'']}}", "}", ", \"starting_replica_count\": | |
", "{{$.inputs.parameters[''starting_replica_count'']}}", ", \"max_replica_count\": | |
", "{{$.inputs.parameters[''max_replica_count'']}}", "}", ", \"manual_batch_tuning_parameters\": | |
{", "\"batch_size\": ", "{{$.inputs.parameters[''manual_batch_tuning_parameters_batch_size'']}}", | |
"}", ", \"generate_explanation\": ", "{{$.inputs.parameters[''generate_explanation'']}}", | |
", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", ", \"encryption_spec\": | |
{\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.batch_prediction_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b4 | |
exec-model-evaluation: | |
container: | |
args: | |
- --setup_file | |
- /setup.py | |
- --json_mode | |
- 'true' | |
- --project_id | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --problem_type | |
- '{{$.inputs.parameters[''problem_type'']}}' | |
- --batch_prediction_format | |
- '{{$.inputs.parameters[''predictions_format'']}}' | |
- --batch_prediction_gcs_source | |
- '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' | |
- --ground_truth_format | |
- '{{$.inputs.parameters[''ground_truth_format'']}}' | |
- --key_prefix_in_prediction_dataset | |
- instance | |
- --root_dir | |
- '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' | |
- --classification_type | |
- multiclass | |
- --ground_truth_column | |
- instance.{{$.inputs.parameters['ground_truth_column']}} | |
- --prediction_score_column | |
- '{{$.inputs.parameters[''prediction_score_column'']}}' | |
- --prediction_label_column | |
- '{{$.inputs.parameters[''prediction_label_column'']}}' | |
- --prediction_id_column | |
- '' | |
- --example_weight_column | |
- '' | |
- --generate_feature_attribution | |
- 'false' | |
- --dataflow_job_prefix | |
- evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
- --dataflow_service_account | |
- '{{$.inputs.parameters[''dataflow_service_account'']}}' | |
- --dataflow_disk_size | |
- '{{$.inputs.parameters[''dataflow_disk_size'']}}' | |
- --dataflow_machine_type | |
- '{{$.inputs.parameters[''dataflow_machine_type'']}}' | |
- --dataflow_workers_num | |
- '{{$.inputs.parameters[''dataflow_workers_num'']}}' | |
- --dataflow_max_workers_num | |
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' | |
- --dataflow_subnetwork | |
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}' | |
- --dataflow_use_public_ips | |
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' | |
- --kms_key_name | |
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}' | |
- --output_metrics_gcs_path | |
- '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python | |
- /main.py | |
image: gcr.io/ml-pipeline/model-evaluation:v0.4 | |
exec-model-evaluation-2: | |
container: | |
args: | |
- --setup_file | |
- /setup.py | |
- --json_mode | |
- 'true' | |
- --project_id | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --problem_type | |
- '{{$.inputs.parameters[''problem_type'']}}' | |
- --batch_prediction_format | |
- '{{$.inputs.parameters[''predictions_format'']}}' | |
- --batch_prediction_gcs_source | |
- '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' | |
- --ground_truth_format | |
- '{{$.inputs.parameters[''ground_truth_format'']}}' | |
- --key_prefix_in_prediction_dataset | |
- instance | |
- --root_dir | |
- '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' | |
- --classification_type | |
- multiclass | |
- --ground_truth_column | |
- instance.{{$.inputs.parameters['ground_truth_column']}} | |
- --prediction_score_column | |
- '{{$.inputs.parameters[''prediction_score_column'']}}' | |
- --prediction_label_column | |
- '{{$.inputs.parameters[''prediction_label_column'']}}' | |
- --prediction_id_column | |
- '' | |
- --example_weight_column | |
- '' | |
- --generate_feature_attribution | |
- 'false' | |
- --dataflow_job_prefix | |
- evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
- --dataflow_service_account | |
- '{{$.inputs.parameters[''dataflow_service_account'']}}' | |
- --dataflow_disk_size | |
- '{{$.inputs.parameters[''dataflow_disk_size'']}}' | |
- --dataflow_machine_type | |
- '{{$.inputs.parameters[''dataflow_machine_type'']}}' | |
- --dataflow_workers_num | |
- '{{$.inputs.parameters[''dataflow_workers_num'']}}' | |
- --dataflow_max_workers_num | |
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' | |
- --dataflow_subnetwork | |
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}' | |
- --dataflow_use_public_ips | |
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' | |
- --kms_key_name | |
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}' | |
- --output_metrics_gcs_path | |
- '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python | |
- /main.py | |
image: gcr.io/ml-pipeline/model-evaluation:v0.4 | |
exec-model-evaluation-3: | |
container: | |
args: | |
- --setup_file | |
- /setup.py | |
- --json_mode | |
- 'true' | |
- --project_id | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --problem_type | |
- '{{$.inputs.parameters[''problem_type'']}}' | |
- --batch_prediction_format | |
- '{{$.inputs.parameters[''predictions_format'']}}' | |
- --batch_prediction_gcs_source | |
- '{{$.inputs.artifacts[''batch_prediction_job''].metadata[''gcsOutputDirectory'']}}' | |
- --ground_truth_format | |
- '{{$.inputs.parameters[''ground_truth_format'']}}' | |
- --key_prefix_in_prediction_dataset | |
- instance | |
- --root_dir | |
- '{{$.inputs.parameters[''root_dir'']}}/{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}' | |
- --classification_type | |
- multiclass | |
- --ground_truth_column | |
- instance.{{$.inputs.parameters['ground_truth_column']}} | |
- --prediction_score_column | |
- '{{$.inputs.parameters[''prediction_score_column'']}}' | |
- --prediction_label_column | |
- '{{$.inputs.parameters[''prediction_label_column'']}}' | |
- --prediction_id_column | |
- '' | |
- --example_weight_column | |
- '' | |
- --generate_feature_attribution | |
- 'false' | |
- --dataflow_job_prefix | |
- evaluation-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}} | |
- --dataflow_service_account | |
- '{{$.inputs.parameters[''dataflow_service_account'']}}' | |
- --dataflow_disk_size | |
- '{{$.inputs.parameters[''dataflow_disk_size'']}}' | |
- --dataflow_machine_type | |
- '{{$.inputs.parameters[''dataflow_machine_type'']}}' | |
- --dataflow_workers_num | |
- '{{$.inputs.parameters[''dataflow_workers_num'']}}' | |
- --dataflow_max_workers_num | |
- '{{$.inputs.parameters[''dataflow_max_workers_num'']}}' | |
- --dataflow_subnetwork | |
- '{{$.inputs.parameters[''dataflow_subnetwork'']}}' | |
- --dataflow_use_public_ips | |
- '{{$.inputs.parameters[''dataflow_use_public_ips'']}}' | |
- --kms_key_name | |
- '{{$.inputs.parameters[''encryption_spec_key_name'']}}' | |
- --output_metrics_gcs_path | |
- '{{$.outputs.artifacts[''evaluation_metrics''].uri}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
command: | |
- python | |
- /main.py | |
image: gcr.io/ml-pipeline/model-evaluation:v0.4 | |
exec-model-evaluation-import: | |
container: | |
args: | |
- '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", | |
"--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' | |
- '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' | |
- '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", | |
"{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", | |
"{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", | |
"{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", | |
"{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", | |
"{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", | |
"{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", | |
"{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", | |
"{{$.inputs.parameters[''problem_type'']}}"]}}' | |
- --display_name | |
- '{{$.inputs.parameters[''display_name'']}}' | |
- --dataset_path | |
- '{{$.inputs.parameters[''dataset_path'']}}' | |
- --dataset_paths | |
- '{{$.inputs.parameters[''dataset_paths'']}}' | |
- --dataset_type | |
- '{{$.inputs.parameters[''dataset_type'']}}' | |
- --pipeline_job_id | |
- '{{$.pipeline_job_uuid}}' | |
- --pipeline_job_resource_name | |
- '{{$.pipeline_job_resource_name}}' | |
- --model_name | |
- '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 | |
exec-model-evaluation-import-2: | |
container: | |
args: | |
- '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", | |
"--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' | |
- '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' | |
- '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", | |
"{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", | |
"{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", | |
"{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", | |
"{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", | |
"{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", | |
"{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", | |
"{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", | |
"{{$.inputs.parameters[''problem_type'']}}"]}}' | |
- --display_name | |
- '{{$.inputs.parameters[''display_name'']}}' | |
- --dataset_path | |
- '{{$.inputs.parameters[''dataset_path'']}}' | |
- --dataset_paths | |
- '{{$.inputs.parameters[''dataset_paths'']}}' | |
- --dataset_type | |
- '{{$.inputs.parameters[''dataset_type'']}}' | |
- --pipeline_job_id | |
- '{{$.pipeline_job_uuid}}' | |
- --pipeline_job_resource_name | |
- '{{$.pipeline_job_resource_name}}' | |
- --model_name | |
- '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 | |
exec-model-evaluation-import-3: | |
container: | |
args: | |
- '{"IfPresent": {"InputName": "metrics", "Then": ["--metrics", "{{$.inputs.artifacts[''metrics''].uri}}", | |
"--metrics_explanation", "{{$.inputs.artifacts[''metrics''].metadata[''explanation_gcs_path'']}}"]}}' | |
- '{"IfPresent": {"InputName": "explanation", "Then": ["--explanation", "{{$.inputs.artifacts[''explanation''].metadata[''explanation_gcs_path'']}}"]}}' | |
- '{"IfPresent": {"InputName": "classification_metrics", "Then": ["--classification_metrics", | |
"{{$.inputs.artifacts[''classification_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "forecasting_metrics", "Then": ["--forecasting_metrics", | |
"{{$.inputs.artifacts[''forecasting_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "regression_metrics", "Then": ["--regression_metrics", | |
"{{$.inputs.artifacts[''regression_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "text_generation_metrics", "Then": ["--text_generation_metrics", | |
"{{$.inputs.artifacts[''text_generation_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "question_answering_metrics", "Then": ["--question_answering_metrics", | |
"{{$.inputs.artifacts[''question_answering_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "summarization_metrics", "Then": ["--summarization_metrics", | |
"{{$.inputs.artifacts[''summarization_metrics''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "feature_attributions", "Then": ["--feature_attributions", | |
"{{$.inputs.artifacts[''feature_attributions''].uri}}"]}}' | |
- '{"IfPresent": {"InputName": "problem_type", "Then": ["--problem_type", | |
"{{$.inputs.parameters[''problem_type'']}}"]}}' | |
- --display_name | |
- '{{$.inputs.parameters[''display_name'']}}' | |
- --dataset_path | |
- '{{$.inputs.parameters[''dataset_path'']}}' | |
- --dataset_paths | |
- '{{$.inputs.parameters[''dataset_paths'']}}' | |
- --dataset_type | |
- '{{$.inputs.parameters[''dataset_type'']}}' | |
- --pipeline_job_id | |
- '{{$.pipeline_job_uuid}}' | |
- --pipeline_job_resource_name | |
- '{{$.pipeline_job_resource_name}}' | |
- --model_name | |
- '{{$.inputs.artifacts[''model''].metadata[''resourceName'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.experimental.evaluation.import_model_evaluation | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:2.0.0b3 | |
exec-model-upload: | |
container: | |
args: | |
- --type | |
- UploadModel | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", | |
"\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", | |
"\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
- '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", | |
"{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- launcher | |
image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 | |
exec-model-upload-2: | |
container: | |
args: | |
- --type | |
- UploadModel | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", | |
"\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", | |
"\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
- '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", | |
"{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- launcher | |
image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 | |
exec-model-upload-3: | |
container: | |
args: | |
- --type | |
- UploadModel | |
- --payload | |
- '{"Concat": ["{", "\"display_name\": \"", "{{$.inputs.parameters[''display_name'']}}", | |
"\"", ", \"description\": \"", "{{$.inputs.parameters[''description'']}}", | |
"\"", ", \"explanation_spec\": {", "\"parameters\": ", "{{$.inputs.parameters[''explanation_parameters'']}}", | |
", \"metadata\": ", "{{$.inputs.parameters[''explanation_metadata'']}}", | |
"}", ", \"explanation_metadata_artifact\": \"", "{{$.inputs.artifacts[''explanation_metadata_artifact''].uri}}", | |
"\"", ", \"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}", ", \"labels\": ", "{{$.inputs.parameters[''labels'']}}", "}"]}' | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --executor_input | |
- '{{$}}' | |
- '{"IfPresent": {"InputName": "parent_model", "Then": ["--parent_model_name", | |
"{{$.inputs.artifacts[''parent_model''].metadata[''resourceName'']}}"]}}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- launcher | |
image: gcr.io/ml-pipeline/automl-tables-private:1.0.15 | |
exec-read-input-uri: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _read_input_uri | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ | |
) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ | |
\ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ | |
\ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ | |
\ list of string that represents the batch prediction input files.\n \"\ | |
\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ | |
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ | |
\ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ | |
\ return data_source['tf_record_data_source']['file_patterns']\n\n" | |
image: python:3.7 | |
exec-read-input-uri-2: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _read_input_uri | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _read_input_uri(\n split_uri: dsl.InputPath('Dataset'),\n\ | |
) -> list: # Required by KFP; pylint:disable=g-bare-generic\n \"\"\"Construct\ | |
\ Dataset based on the batch prediction job.\n\n Args:\n split_uri:\ | |
\ Tbe path to the file that contains Dataset data.\n\n Returns:\n The\ | |
\ list of string that represents the batch prediction input files.\n \"\ | |
\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ | |
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ | |
\ with open(split_uri, 'r') as f:\n data_source = json.loads(f.read())\n\ | |
\ return data_source['tf_record_data_source']['file_patterns']\n\n" | |
image: python:3.7 | |
exec-set-optional-inputs: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _set_optional_inputs | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _set_optional_inputs(\n project: str,\n location: str,\n\ | |
\ data_source_csv_filenames: str,\n data_source_bigquery_table_path:\ | |
\ str,\n vertex_dataset: dsl.Input[dsl.Artifact],\n model_display_name:\ | |
\ str,\n) -> NamedTuple(\n 'Outputs',\n [\n ('data_source_csv_filenames',\ | |
\ str),\n ('data_source_bigquery_table_path', str),\n ('model_display_name',\ | |
\ str),\n ],\n):\n \"\"\"Get the data source URI.\n\n Args:\n project:\ | |
\ The GCP project that runs the pipeline components.\n location: The\ | |
\ GCP region that runs the pipeline components.\n data_source_csv_filenames:\ | |
\ The CSV GCS path when data source is CSV.\n data_source_bigquery_table_path:\ | |
\ The BigQuery table when data source is BQ.\n vertex_dataset: The Vertex\ | |
\ dataset when data source is Vertex dataset.\n model_display_name: The\ | |
\ uploaded model's display name.\n\n Returns:\n A named tuple of CSV\ | |
\ or BQ URI.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ | |
\ import collections\n from google.cloud import aiplatform\n from google.cloud\ | |
\ import aiplatform_v1beta1 as aip\n import uuid\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name\n\ | |
\n if not model_display_name:\n model_display_name = f'tabular-workflow-model-{uuid.uuid4()}'\n\ | |
\n if vertex_dataset is not None:\n # of format\n # projects/294348452381/locations/us-central1/datasets/7104764862735056896\n\ | |
\ dataset_name = vertex_dataset.metadata['resourceName']\n\n aiplatform.init(project=project,\ | |
\ location=location)\n client = aip.DatasetServiceClient(\n client_options={'api_endpoint':\ | |
\ f'{location}-aiplatform.googleapis.com'}\n )\n dataset = client.get_dataset(name=dataset_name)\n\ | |
\ input_config = dataset.metadata['inputConfig']\n print(input_config)\n\ | |
\ if 'gcsSource' in input_config:\n data_source_csv_filenames =\ | |
\ ','.join(input_config['gcsSource']['uri'])\n elif 'bigquerySource'\ | |
\ in input_config:\n data_source_bigquery_table_path = input_config['bigquerySource']['uri']\n\ | |
\ elif data_source_csv_filenames:\n pass\n elif data_source_bigquery_table_path:\n\ | |
\ pass\n else:\n raise ValueError(\n 'One of vertex_dataset,\ | |
\ data_source_csv_filenames,'\n ' data_source_bigquery_table_path\ | |
\ must be specified'\n )\n\n return collections.namedtuple(\n 'Outputs',\n\ | |
\ [\n 'data_source_csv_filenames',\n 'data_source_bigquery_table_path',\n\ | |
\ 'model_display_name',\n ],\n )(\n data_source_csv_filenames,\n\ | |
\ data_source_bigquery_table_path,\n model_display_name,\n )\n\ | |
\n" | |
image: us-docker.pkg.dev/vertex-ai/automl-tabular/kfp-v2-base:20230817_0125 | |
exec-string-not-empty: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _string_not_empty | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _string_not_empty(value: str) -> str:\n \"\"\"Check if the input\ | |
\ string value is not empty.\n\n Args:\n value: String value to be checked.\n\ | |
\n Returns:\n Boolean value. -> 'true' if empty, 'false' if not empty.\ | |
\ We need to use str\n instead of bool due to a limitation in KFP compiler.\n\ | |
\ \"\"\"\n return 'true' if value else 'false'\n\n" | |
image: python:3.7 | |
exec-tabular-stats-and-example-gen: | |
container: | |
args: | |
- --type | |
- CustomJob | |
- --project | |
- '{{$.inputs.parameters[''project'']}}' | |
- --location | |
- '{{$.inputs.parameters[''location'']}}' | |
- --gcp_resources | |
- '{{$.outputs.parameters[''gcp_resources''].output_file}}' | |
- --payload | |
- '{"Concat": ["{\"display_name\": \"tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}\", | |
\"encryption_spec\": {\"kms_key_name\":\"", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\"}, \"job_spec\": {\"worker_pool_specs\": [{\"replica_count\": 1, \"machine_spec\": | |
{\"machine_type\": \"n1-standard-8\"}, \"container_spec\": {\"image_uri\":\"", | |
"us-docker.pkg.dev/vertex-ai-restricted/automl-tabular/training:20230817_0125", "\", | |
\"args\": [\"stats_generator\",", "\"--train_spec={\\\"prediction_type\\\": | |
\\\"", "{{$.inputs.parameters[''prediction_type'']}}", "\\\", \\\"target_column\\\": | |
\\\"", "{{$.inputs.parameters[''target_column_name'']}}", "\\\", \\\"optimization_objective\\\": | |
\\\"", "{{$.inputs.parameters[''optimization_objective'']}}", "\\\", \\\"weight_column_name\\\": | |
\\\"", "{{$.inputs.parameters[''weight_column_name'']}}", "\\\", \\\"transformations\\\": | |
", "{{$.inputs.parameters[''transformations'']}}", ", \\\"quantiles\\\": | |
", "{{$.inputs.parameters[''quantiles'']}}", ", \\\"enable_probabilistic_inference\\\": | |
", "{{$.inputs.parameters[''enable_probabilistic_inference'']}}", "}\", | |
\"--transformations_override_path=", "{{$.inputs.parameters[''transformations_path'']}}", | |
"\", \"--data_source_csv_filenames=", "{{$.inputs.parameters[''data_source_csv_filenames'']}}", | |
"\", \"--data_source_bigquery_table_path=", "{{$.inputs.parameters[''data_source_bigquery_table_path'']}}", | |
"\", \"--predefined_split_key=", "{{$.inputs.parameters[''predefined_split_key'']}}", | |
"\", \"--timestamp_split_key=", "{{$.inputs.parameters[''timestamp_split_key'']}}", | |
"\", \"--stratified_split_key=", "{{$.inputs.parameters[''stratified_split_key'']}}", | |
"\", \"--training_fraction=", "{{$.inputs.parameters[''training_fraction'']}}", | |
"\", \"--validation_fraction=", "{{$.inputs.parameters[''validation_fraction'']}}", | |
"\", \"--test_fraction=", "{{$.inputs.parameters[''test_fraction'']}}", | |
"\", \"--target_column=", "{{$.inputs.parameters[''target_column_name'']}}", | |
"\", \"--request_type=", "{{$.inputs.parameters[''request_type'']}}", "\", | |
\"--optimization_objective_recall_value=", "{{$.inputs.parameters[''optimization_objective_recall_value'']}}", | |
"\", \"--optimization_objective_precision_value=", "{{$.inputs.parameters[''optimization_objective_precision_value'']}}", | |
"\", \"--example_gen_gcs_output_prefix=", "{{$.inputs.parameters[''root_dir'']}}", | |
"/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/example_gen_output\", | |
\"--dataset_stats_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/stats/\", | |
\"--stats_result_path=", "{{$.outputs.artifacts[''dataset_stats''].uri}}", | |
"\", \"--dataset_schema_path=", "{{$.outputs.artifacts[''dataset_schema''].uri}}", | |
"\", \"--job_name=tabular-stats-and-example-gen-{{$.pipeline_job_uuid}}-{{$.pipeline_task_uuid}}", | |
"\", \"--dataflow_project=", "{{$.inputs.parameters[''project'']}}", "\", | |
\"--error_file_path=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/error.pb\", | |
\"--dataflow_staging_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_staging\", | |
\"--dataflow_tmp_dir=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/{{$.pipeline_task_uuid}}/dataflow_tmp\", | |
\"--dataflow_max_num_workers=", "{{$.inputs.parameters[''dataflow_max_num_workers'']}}", | |
"\", \"--dataflow_worker_container_image=", "us-docker.pkg.dev/vertex-ai/automl-tabular/dataflow-worker:20230817_0125", | |
"\", \"--dataflow_machine_type=", "{{$.inputs.parameters[''dataflow_machine_type'']}}", | |
"\", \"--dataflow_disk_size_gb=", "{{$.inputs.parameters[''dataflow_disk_size_gb'']}}", | |
"\", \"--dataflow_kms_key=", "{{$.inputs.parameters[''encryption_spec_key_name'']}}", | |
"\", \"--dataflow_subnetwork_fully_qualified=", "{{$.inputs.parameters[''dataflow_subnetwork'']}}", | |
"\", \"--dataflow_use_public_ips=", "{{$.inputs.parameters[''dataflow_use_public_ips'']}}", | |
"\", \"--dataflow_service_account=", "{{$.inputs.parameters[''dataflow_service_account'']}}", | |
"\", \"--is_distill=", "{{$.inputs.parameters[''run_distillation'']}}", | |
"\", \"--additional_experiments=", "{{$.inputs.parameters[''additional_experiments'']}}", | |
"\", \"--metadata_path=", "{{$.outputs.artifacts[''metadata''].uri}}", "\", | |
\"--train_split=", "{{$.outputs.artifacts[''train_split''].uri}}", "\", | |
\"--eval_split=", "{{$.outputs.artifacts[''eval_split''].uri}}", "\", \"--test_split=", | |
"{{$.outputs.artifacts[''test_split''].uri}}", "\", \"--test_split_for_batch_prediction_component=", | |
"{{$.outputs.parameters[''test_split_json''].output_file}}", "\", \"--downsampled_test_split_for_batch_prediction_component=", | |
"{{$.outputs.parameters[''downsampled_test_split_json''].output_file}}", | |
"\", \"--instance_baseline_path=", "{{$.outputs.artifacts[''instance_baseline''].uri}}", | |
"\", \"--lro_job_info=", "{{$.inputs.parameters[''root_dir'']}}", "/{{$.pipeline_job_uuid}}/lro\", | |
\"--gcp_resources_path=", "{{$.outputs.parameters[''gcp_resources''].output_file}}", | |
"\", \"--parse_json=true\", \"--generate_additional_downsample_test_split=true\", | |
\"--executor_input={{$.json_escape[1]}}\"]}}]}}"]}' | |
command: | |
- python3 | |
- -u | |
- -m | |
- google_cloud_pipeline_components.container.v1.custom_job.launcher | |
image: gcr.io/ml-pipeline/google-cloud-pipeline-components:1.0.44 | |
exec-write-bp-result-path: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _write_bp_result_path | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ | |
\ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ | |
\ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ | |
\ job artifact.\n result: Tbe path to the file that contains Dataset\ | |
\ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ | |
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ | |
\ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ | |
\ 'tf_record_data_source': {\n 'file_patterns': [\n \ | |
\ f'{directory}/prediction.results-*',\n ],\n 'coder':\ | |
\ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ | |
\n" | |
image: python:3.7 | |
exec-write-bp-result-path-2: | |
container: | |
args: | |
- --executor_input | |
- '{{$}}' | |
- --function_to_execute | |
- _write_bp_result_path | |
command: | |
- sh | |
- -c | |
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | |
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | |
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.0.0-beta.17'\ | |
\ && \"$0\" \"$@\"\n" | |
- sh | |
- -ec | |
- 'program_path=$(mktemp -d) | |
printf "%s" "$0" > "$program_path/ephemeral_component.py" | |
python3 -m kfp.components.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | |
' | |
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | |
\ *\n\ndef _write_bp_result_path(\n bp_job: dsl.Input[dsl.Artifact],\n\ | |
\ result: dsl.OutputPath('Dataset'),\n):\n \"\"\"Construct Dataset based\ | |
\ on the batch prediction job.\n\n Args:\n bp_job: The batch prediction\ | |
\ job artifact.\n result: Tbe path to the file that contains Dataset\ | |
\ data.\n \"\"\"\n # pylint: disable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ | |
\ import json\n # pylint: enable=g-import-not-at-top,import-outside-toplevel,redefined-outer-name,reimported\n\ | |
\ directory = bp_job.metadata['gcsOutputDirectory']\n data_source = {\n\ | |
\ 'tf_record_data_source': {\n 'file_patterns': [\n \ | |
\ f'{directory}/prediction.results-*',\n ],\n 'coder':\ | |
\ 'PROTO_VALUE',\n },\n }\n with open(result, 'w') as f:\n f.write(json.dumps(data_source))\n\ | |
\n" | |
image: python:3.7 | |
pipelineInfo: | |
description: The AutoML Tabular pipeline v1. | |
name: automl-tabular | |
root: | |
dag: | |
outputs: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-2-feature_attributions | |
producerSubtask: exit-handler-1 | |
feature-attribution-3-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-3-feature_attributions | |
producerSubtask: exit-handler-1 | |
feature-attribution-feature_attributions: | |
artifactSelectors: | |
- outputArtifactKey: feature-attribution-feature_attributions | |
producerSubtask: exit-handler-1 | |
model-evaluation-2-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-2-evaluation_metrics | |
producerSubtask: exit-handler-1 | |
model-evaluation-3-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-3-evaluation_metrics | |
producerSubtask: exit-handler-1 | |
model-evaluation-evaluation_metrics: | |
artifactSelectors: | |
- outputArtifactKey: model-evaluation-evaluation_metrics | |
producerSubtask: exit-handler-1 | |
tasks: | |
automl-tabular-finalizer: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-automl-tabular-finalizer | |
dependentTasks: | |
- exit-handler-1 | |
inputs: | |
parameters: | |
location: | |
componentInputParameter: location | |
project: | |
componentInputParameter: project | |
root_dir: | |
componentInputParameter: root_dir | |
taskInfo: | |
name: automl-tabular-finalizer | |
triggerPolicy: | |
strategy: ALL_UPSTREAM_TASKS_COMPLETED | |
exit-handler-1: | |
componentRef: | |
name: comp-exit-handler-1 | |
dependentTasks: | |
- set-optional-inputs | |
inputs: | |
artifacts: | |
pipelinechannel--parent_model: | |
componentInputArtifact: parent_model | |
parameters: | |
pipelinechannel--additional_experiments: | |
componentInputParameter: additional_experiments | |
pipelinechannel--cv_trainer_worker_pool_specs_override: | |
componentInputParameter: cv_trainer_worker_pool_specs_override | |
pipelinechannel--dataflow_service_account: | |
componentInputParameter: dataflow_service_account | |
pipelinechannel--dataflow_subnetwork: | |
componentInputParameter: dataflow_subnetwork | |
pipelinechannel--dataflow_use_public_ips: | |
componentInputParameter: dataflow_use_public_ips | |
pipelinechannel--disable_early_stopping: | |
componentInputParameter: disable_early_stopping | |
pipelinechannel--distill_batch_predict_machine_type: | |
componentInputParameter: distill_batch_predict_machine_type | |
pipelinechannel--distill_batch_predict_max_replica_count: | |
componentInputParameter: distill_batch_predict_max_replica_count | |
pipelinechannel--distill_batch_predict_starting_replica_count: | |
componentInputParameter: distill_batch_predict_starting_replica_count | |
pipelinechannel--enable_probabilistic_inference: | |
componentInputParameter: enable_probabilistic_inference | |
pipelinechannel--encryption_spec_key_name: | |
componentInputParameter: encryption_spec_key_name | |
pipelinechannel--evaluation_batch_explain_machine_type: | |
componentInputParameter: evaluation_batch_explain_machine_type | |
pipelinechannel--evaluation_batch_explain_max_replica_count: | |
componentInputParameter: evaluation_batch_explain_max_replica_count | |
pipelinechannel--evaluation_batch_explain_starting_replica_count: | |
componentInputParameter: evaluation_batch_explain_starting_replica_count | |
pipelinechannel--evaluation_batch_predict_machine_type: | |
componentInputParameter: evaluation_batch_predict_machine_type | |
pipelinechannel--evaluation_batch_predict_max_replica_count: | |
componentInputParameter: evaluation_batch_predict_max_replica_count | |
pipelinechannel--evaluation_batch_predict_starting_replica_count: | |
componentInputParameter: evaluation_batch_predict_starting_replica_count | |
pipelinechannel--evaluation_dataflow_disk_size_gb: | |
componentInputParameter: evaluation_dataflow_disk_size_gb | |
pipelinechannel--evaluation_dataflow_machine_type: | |
componentInputParameter: evaluation_dataflow_machine_type | |
pipelinechannel--evaluation_dataflow_max_num_workers: | |
componentInputParameter: evaluation_dataflow_max_num_workers | |
pipelinechannel--evaluation_dataflow_starting_num_workers: | |
componentInputParameter: evaluation_dataflow_starting_num_workers | |
pipelinechannel--export_additional_model_without_custom_ops: | |
componentInputParameter: export_additional_model_without_custom_ops | |
pipelinechannel--fast_testing: | |
componentInputParameter: fast_testing | |
pipelinechannel--location: | |
componentInputParameter: location | |
pipelinechannel--model_description: | |
componentInputParameter: model_description | |
pipelinechannel--optimization_objective: | |
componentInputParameter: optimization_objective | |
pipelinechannel--optimization_objective_precision_value: | |
componentInputParameter: optimization_objective_precision_value | |
pipelinechannel--optimization_objective_recall_value: | |
componentInputParameter: optimization_objective_recall_value | |
pipelinechannel--predefined_split_key: | |
componentInputParameter: predefined_split_key | |
pipelinechannel--prediction_type: | |
componentInputParameter: prediction_type | |
pipelinechannel--project: | |
componentInputParameter: project | |
pipelinechannel--quantiles: | |
componentInputParameter: quantiles | |
pipelinechannel--root_dir: | |
componentInputParameter: root_dir | |
pipelinechannel--run_distillation: | |
componentInputParameter: run_distillation | |
pipelinechannel--run_evaluation: | |
componentInputParameter: run_evaluation | |
pipelinechannel--set-optional-inputs-data_source_bigquery_table_path: | |
taskOutputParameter: | |
outputParameterKey: data_source_bigquery_table_path | |
producerTask: set-optional-inputs | |
pipelinechannel--set-optional-inputs-data_source_csv_filenames: | |
taskOutputParameter: | |
outputParameterKey: data_source_csv_filenames | |
producerTask: set-optional-inputs | |
pipelinechannel--set-optional-inputs-model_display_name: | |
taskOutputParameter: | |
outputParameterKey: model_display_name | |
producerTask: set-optional-inputs | |
pipelinechannel--stage_1_num_parallel_trials: | |
componentInputParameter: stage_1_num_parallel_trials | |
pipelinechannel--stage_1_tuner_worker_pool_specs_override: | |
componentInputParameter: stage_1_tuner_worker_pool_specs_override | |
pipelinechannel--stage_1_tuning_result_artifact_uri: | |
componentInputParameter: stage_1_tuning_result_artifact_uri | |
pipelinechannel--stage_2_num_parallel_trials: | |
componentInputParameter: stage_2_num_parallel_trials | |
pipelinechannel--stage_2_num_selected_trials: | |
componentInputParameter: stage_2_num_selected_trials | |
pipelinechannel--stats_and_example_gen_dataflow_disk_size_gb: | |
componentInputParameter: stats_and_example_gen_dataflow_disk_size_gb | |
pipelinechannel--stats_and_example_gen_dataflow_machine_type: | |
componentInputParameter: stats_and_example_gen_dataflow_machine_type | |
pipelinechannel--stats_and_example_gen_dataflow_max_num_workers: | |
componentInputParameter: stats_and_example_gen_dataflow_max_num_workers | |
pipelinechannel--stratified_split_key: | |
componentInputParameter: stratified_split_key | |
pipelinechannel--study_spec_parameters_override: | |
componentInputParameter: study_spec_parameters_override | |
pipelinechannel--target_column: | |
componentInputParameter: target_column | |
pipelinechannel--test_fraction: | |
componentInputParameter: test_fraction | |
pipelinechannel--timestamp_split_key: | |
componentInputParameter: timestamp_split_key | |
pipelinechannel--train_budget_milli_node_hours: | |
componentInputParameter: train_budget_milli_node_hours | |
pipelinechannel--training_fraction: | |
componentInputParameter: training_fraction | |
pipelinechannel--transform_dataflow_disk_size_gb: | |
componentInputParameter: transform_dataflow_disk_size_gb | |
pipelinechannel--transform_dataflow_machine_type: | |
componentInputParameter: transform_dataflow_machine_type | |
pipelinechannel--transform_dataflow_max_num_workers: | |
componentInputParameter: transform_dataflow_max_num_workers | |
pipelinechannel--transformations: | |
componentInputParameter: transformations | |
pipelinechannel--validation_fraction: | |
componentInputParameter: validation_fraction | |
pipelinechannel--weight_column: | |
componentInputParameter: weight_column | |
taskInfo: | |
name: exit-handler-1 | |
set-optional-inputs: | |
cachingOptions: | |
enableCache: true | |
componentRef: | |
name: comp-set-optional-inputs | |
inputs: | |
artifacts: | |
vertex_dataset: | |
componentInputArtifact: vertex_dataset | |
parameters: | |
data_source_bigquery_table_path: | |
componentInputParameter: data_source_bigquery_table_path | |
data_source_csv_filenames: | |
componentInputParameter: data_source_csv_filenames | |
location: | |
componentInputParameter: location | |
model_display_name: | |
componentInputParameter: model_display_name | |
project: | |
componentInputParameter: project | |
taskInfo: | |
name: set-optional-inputs | |
inputDefinitions: | |
artifacts: | |
parent_model: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: Vertex Model to upload this model as a version of. | |
isOptional: true | |
vertex_dataset: | |
artifactType: | |
schemaTitle: system.Artifact | |
schemaVersion: 0.0.1 | |
description: The Vertex dataset artifact. | |
parameters: | |
additional_experiments: | |
description: Use this field to config private preview features. | |
isOptional: true | |
parameterType: STRUCT | |
cv_trainer_worker_pool_specs_override: | |
description: 'The dictionary for overriding stage | |
cv trainer worker pool spec.' | |
isOptional: true | |
parameterType: LIST | |
data_source_bigquery_table_path: | |
defaultValue: '' | |
description: 'The BigQuery table path of format | |
bq://bq_project.bq_dataset.bq_table' | |
isOptional: true | |
parameterType: STRING | |
data_source_csv_filenames: | |
defaultValue: '' | |
description: 'A string that represents a list of comma | |
separated CSV filenames.' | |
isOptional: true | |
parameterType: STRING | |
dataflow_service_account: | |
defaultValue: '' | |
description: Custom service account to run dataflow jobs. | |
isOptional: true | |
parameterType: STRING | |
dataflow_subnetwork: | |
defaultValue: '' | |
description: 'Dataflow''s fully qualified subnetwork name, when empty | |
the default subnetwork will be used. Example: | |
https://cloud.google.com/dataflow/docs/guides/specifying-networks#example_network_and_subnetwork_specifications' | |
isOptional: true | |
parameterType: STRING | |
dataflow_use_public_ips: | |
defaultValue: true | |
description: 'Specifies whether Dataflow workers use public IP | |
addresses.' | |
isOptional: true | |
parameterType: BOOLEAN | |
disable_early_stopping: | |
defaultValue: false | |
description: If disable easly stopping. | |
isOptional: true | |
parameterType: BOOLEAN | |
distill_batch_predict_machine_type: | |
defaultValue: n1-standard-16 | |
description: 'The prediction server machine type for | |
batch predict component in the model distillation.' | |
isOptional: true | |
parameterType: STRING | |
distill_batch_predict_max_replica_count: | |
defaultValue: 25.0 | |
description: 'The max number of prediction server | |
for batch predict component in the model distillation.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
distill_batch_predict_starting_replica_count: | |
defaultValue: 25.0 | |
description: 'The initial number of | |
prediction server for batch predict component in the model distillation.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
enable_probabilistic_inference: | |
defaultValue: false | |
description: 'If probabilistic inference is enabled, the | |
model will fit a distribution that captures the uncertainty of a | |
prediction. At inference time, the predictive distribution is used to make | |
a point prediction that minimizes the optimization objective. For example, | |
the mean of a predictive distribution is the point prediction that | |
minimizes RMSE loss. If quantiles are specified, then the quantiles of the | |
distribution are also returned.' | |
isOptional: true | |
parameterType: BOOLEAN | |
encryption_spec_key_name: | |
defaultValue: '' | |
description: The KMS key name. | |
isOptional: true | |
parameterType: STRING | |
evaluation_batch_explain_machine_type: | |
defaultValue: n1-highmem-8 | |
description: 'The prediction server machine type | |
for batch explain components during evaluation.' | |
isOptional: true | |
parameterType: STRING | |
evaluation_batch_explain_max_replica_count: | |
defaultValue: 10.0 | |
description: 'The max number of prediction | |
server for batch explain components during evaluation.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
evaluation_batch_explain_starting_replica_count: | |
defaultValue: 10.0 | |
description: 'The initial number of | |
prediction server for batch explain components during evaluation.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
evaluation_batch_predict_machine_type: | |
defaultValue: n1-highmem-8 | |
description: 'The prediction server machine type | |
for batch predict components during evaluation.' | |
isOptional: true | |
parameterType: STRING | |
evaluation_batch_predict_max_replica_count: | |
defaultValue: 20.0 | |
description: 'The max number of prediction | |
server for batch predict components during evaluation.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
evaluation_batch_predict_starting_replica_count: | |
defaultValue: 20.0 | |
description: 'The initial number of | |
prediction server for batch predict components during evaluation.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
evaluation_dataflow_disk_size_gb: | |
defaultValue: 50.0 | |
description: 'Dataflow worker''s disk size in GB for | |
evaluation components.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
evaluation_dataflow_machine_type: | |
defaultValue: n1-standard-4 | |
description: 'The dataflow machine type for evaluation | |
components.' | |
isOptional: true | |
parameterType: STRING | |
evaluation_dataflow_max_num_workers: | |
defaultValue: 100.0 | |
description: 'The max number of Dataflow workers for | |
evaluation components.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
evaluation_dataflow_starting_num_workers: | |
defaultValue: 10.0 | |
description: 'The initial number of Dataflow | |
workers for evaluation components.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
export_additional_model_without_custom_ops: | |
defaultValue: false | |
description: 'Whether to export additional | |
model without custom TensorFlow operators.' | |
isOptional: true | |
parameterType: BOOLEAN | |
fast_testing: | |
defaultValue: false | |
description: Internal flag used for presubmit tests. | |
isOptional: true | |
parameterType: BOOLEAN | |
location: | |
description: The GCP region that runs the pipeline components. | |
parameterType: STRING | |
model_description: | |
defaultValue: '' | |
description: The description name of the uploaded Vertex model, | |
isOptional: true | |
parameterType: STRING | |
model_display_name: | |
defaultValue: '' | |
description: The display name of the uploaded Vertex model, | |
isOptional: true | |
parameterType: STRING | |
optimization_objective: | |
description: 'For binary classification, "maximize-au-roc", | |
"minimize-log-loss", "maximize-au-prc", "maximize-precision-at-recall", | |
or | |
"maximize-recall-at-precision". For multi class classification, | |
"minimize-log-loss". For regression, "minimize-rmse", "minimize-mae", or | |
"minimize-rmsle".' | |
parameterType: STRING | |
optimization_objective_precision_value: | |
defaultValue: -1.0 | |
description: 'Required when optimization_objective | |
is ''maximize-recall-at-precision''. Must be between 0 and 1, inclusive.' | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
optimization_objective_recall_value: | |
defaultValue: -1.0 | |
description: 'Required when optimization_objective is | |
''maximize-precision-at-recall''. Must be between 0 and 1, inclusive.' | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
predefined_split_key: | |
defaultValue: '' | |
description: The predefined_split column name. | |
isOptional: true | |
parameterType: STRING | |
prediction_type: | |
description: 'The type of prediction the model is to produce. | |
"classification" or "regression".' | |
parameterType: STRING | |
project: | |
description: The GCP project that runs the pipeline components. | |
parameterType: STRING | |
quantiles: | |
description: 'Quantiles to use for probabilistic inference. Up to 5 quantiles | |
are allowed of values between 0 and 1, exclusive. Represents the quantiles | |
to use for that objective. Quantiles must be unique.' | |
isOptional: true | |
parameterType: LIST | |
root_dir: | |
description: The root GCS directory for the pipeline components. | |
parameterType: STRING | |
run_distillation: | |
defaultValue: false | |
description: 'Whether the distillation should be applied to the | |
training.' | |
isOptional: true | |
parameterType: BOOLEAN | |
run_evaluation: | |
defaultValue: false | |
description: Whether to run evaluation steps during training. | |
isOptional: true | |
parameterType: BOOLEAN | |
stage_1_num_parallel_trials: | |
defaultValue: 35.0 | |
description: Number of parallel trails for stage 1. | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
stage_1_tuner_worker_pool_specs_override: | |
description: 'The dictionary for overriding | |
stage 1 tuner worker pool spec.' | |
isOptional: true | |
parameterType: LIST | |
stage_1_tuning_result_artifact_uri: | |
defaultValue: '' | |
description: 'The stage 1 tuning result artifact GCS | |
URI.' | |
isOptional: true | |
parameterType: STRING | |
stage_2_num_parallel_trials: | |
defaultValue: 35.0 | |
description: Number of parallel trails for stage 2. | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
stage_2_num_selected_trials: | |
defaultValue: 5.0 | |
description: Number of selected trails for stage 2. | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
stats_and_example_gen_dataflow_disk_size_gb: | |
defaultValue: 40.0 | |
description: 'Dataflow worker''s disk size in | |
GB for stats_and_example_gen component.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
stats_and_example_gen_dataflow_machine_type: | |
defaultValue: n1-standard-16 | |
description: 'The dataflow machine type for | |
stats_and_example_gen component.' | |
isOptional: true | |
parameterType: STRING | |
stats_and_example_gen_dataflow_max_num_workers: | |
defaultValue: 25.0 | |
description: 'The max number of Dataflow | |
workers for stats_and_example_gen component.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
stratified_split_key: | |
defaultValue: '' | |
description: The stratified_split column name. | |
isOptional: true | |
parameterType: STRING | |
study_spec_parameters_override: | |
description: The list for overriding study spec. | |
isOptional: true | |
parameterType: LIST | |
target_column: | |
description: The target column name. | |
parameterType: STRING | |
test_fraction: | |
defaultValue: -1.0 | |
description: float = The test fraction. | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
timestamp_split_key: | |
defaultValue: '' | |
description: The timestamp_split column name. | |
isOptional: true | |
parameterType: STRING | |
train_budget_milli_node_hours: | |
description: 'The train budget of creating this model, | |
expressed in milli node hours i.e. 1,000 value in this field means 1 node | |
hour.' | |
parameterType: NUMBER_DOUBLE | |
training_fraction: | |
defaultValue: -1.0 | |
description: The training fraction. | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
transform_dataflow_disk_size_gb: | |
defaultValue: 40.0 | |
description: 'Dataflow worker''s disk size in GB for | |
transform component.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
transform_dataflow_machine_type: | |
defaultValue: n1-standard-16 | |
description: 'The dataflow machine type for transform | |
component.' | |
isOptional: true | |
parameterType: STRING | |
transform_dataflow_max_num_workers: | |
defaultValue: 25.0 | |
description: 'The max number of Dataflow workers for | |
transform component.' | |
isOptional: true | |
parameterType: NUMBER_INTEGER | |
transformations: | |
description: 'The path to a GCS file containing the transformations to | |
apply.' | |
parameterType: STRING | |
validation_fraction: | |
defaultValue: -1.0 | |
description: The validation fraction. | |
isOptional: true | |
parameterType: NUMBER_DOUBLE | |
weight_column: | |
defaultValue: '' | |
description: The weight column name. | |
isOptional: true | |
parameterType: STRING | |
outputDefinitions: | |
artifacts: | |
feature-attribution-2-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
feature-attribution-3-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
feature-attribution-feature_attributions: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-2-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-3-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
model-evaluation-evaluation_metrics: | |
artifactType: | |
schemaTitle: system.Metrics | |
schemaVersion: 0.0.1 | |
schemaVersion: 2.1.0 | |
sdkVersion: kfp-2.0.0-beta.17 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment